From 87c83dd7307a261b8ef5f3182ce56323bbb69111 Mon Sep 17 00:00:00 2001
From: Nicholas Frechette <zeno490@gmail.com>
Date: Wed, 26 Mar 2025 21:56:25 -0400
Subject: [PATCH 1/6] refactor(tests): rename test cpp

---
 tests/sources/{test_macros.cpp => test_macros_matrix.cpp} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename tests/sources/{test_macros.cpp => test_macros_matrix.cpp} (99%)

diff --git a/tests/sources/test_macros.cpp b/tests/sources/test_macros_matrix.cpp
similarity index 99%
rename from tests/sources/test_macros.cpp
rename to tests/sources/test_macros_matrix.cpp
index c74ec011..59794cfb 100644
--- a/tests/sources/test_macros.cpp
+++ b/tests/sources/test_macros_matrix.cpp
@@ -27,7 +27,7 @@
 #include <rtm/macros.h>
 #include <rtm/vector4f.h>
 
-TEST_CASE("macros", "[math][macros]")
+TEST_CASE("macros matrix", "[math][macros][matrix]")
 {
 	const float threshold = 0.0F;	// Result must be binary exact!
 

From 14685a8b2943850ab08736d65e7903a379469365 Mon Sep 17 00:00:00 2001
From: Nicholas Frechette <zeno490@gmail.com>
Date: Wed, 26 Mar 2025 22:08:45 -0400
Subject: [PATCH 2/6] feat(tests): add matrixd macro tests

---
 tests/sources/test_macros_matrix.cpp | 110 ++++++++++++++++++++++++++-
 1 file changed, 109 insertions(+), 1 deletion(-)

diff --git a/tests/sources/test_macros_matrix.cpp b/tests/sources/test_macros_matrix.cpp
index 59794cfb..58f9f44c 100644
--- a/tests/sources/test_macros_matrix.cpp
+++ b/tests/sources/test_macros_matrix.cpp
@@ -25,9 +25,10 @@
 #include "catch2.impl.h"
 
 #include <rtm/macros.h>
+#include <rtm/vector4d.h>
 #include <rtm/vector4f.h>
 
-TEST_CASE("macros matrix", "[math][macros][matrix]")
+TEST_CASE("macros matrixf", "[math][macros][matrix]")
 {
 	const float threshold = 0.0F;	// Result must be binary exact!
 
@@ -127,3 +128,110 @@ TEST_CASE("macros matrix", "[math][macros][matrix]")
 		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0F, 21.0F, 22.0F), xxx, threshold));
 	}
 }
+
+TEST_CASE("macros matrixd", "[math][macros][matrix]")
+{
+	const double threshold = 0.0;	// Result must be binary exact!
+
+	{
+		rtm::vector4d xyz0 = rtm::vector_set(1.0, 2.0, 3.0);
+		rtm::vector4d xyz1 = rtm::vector_set(4.0, 5.0, 6.0);
+		rtm::vector4d xyz2 = rtm::vector_set(7.0, 8.0, 9.0);
+
+		rtm::vector4d xxx;
+		rtm::vector4d yyy;
+		rtm::vector4d zzz;
+		RTM_MATRIXD_TRANSPOSE_3X3(xyz0, xyz1, xyz2, xxx, yyy, zzz);
+
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xxx, threshold));
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), yyy, threshold));
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), zzz, threshold));
+
+		// Test when input == output
+		RTM_MATRIXD_TRANSPOSE_3X3(xyz0, xyz1, xyz2, xyz0, xyz1, xyz2);
+
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xyz0, threshold));
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), xyz1, threshold));
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), xyz2, threshold));
+	}
+
+#if 0	// not implemented yet
+	{
+		rtm::vector4d xyzw0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0);
+		rtm::vector4d xyzw1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0);
+		rtm::vector4d xyzw2 = rtm::vector_set(7.0, 8.0, 9.0, 22.0);
+		rtm::vector4d xyzw3 = rtm::vector_set(10.0, 11.0, 12.0, 23.0);
+
+		rtm::vector4d xxxx;
+		rtm::vector4d yyyy;
+		rtm::vector4d zzzz;
+		rtm::vector4d wwww;
+		RTM_MATRIXD_TRANSPOSE_4X4(xyzw0, xyzw1, xyzw2, xyzw3, xxxx, yyyy, zzzz, wwww);
+
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xxxx, threshold));
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), yyyy, threshold));
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), zzzz, threshold));
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(20.0, 21.0, 22.0, 23.0), wwww, threshold));
+
+		// Test when input == output
+		RTM_MATRIXD_TRANSPOSE_4X4(xyzw0, xyzw1, xyzw2, xyzw3, xyzw0, xyzw1, xyzw2, xyzw3);
+
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xyzw0, threshold));
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), xyzw1, threshold));
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyzw2, threshold));
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(20.0, 21.0, 22.0, 23.0), xyzw3, threshold));
+	}
+#endif
+
+#if 0	// not implemented yet
+	{
+		rtm::vector4d xyz0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0);
+		rtm::vector4d xyz1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0);
+		rtm::vector4d xyz2 = rtm::vector_set(7.0, 8.0, 9.0, 22.0);
+		rtm::vector4d xyz3 = rtm::vector_set(10.0, 11.0, 12.0, 23.0);
+
+		rtm::vector4d xxxx;
+		rtm::vector4d yyyy;
+		rtm::vector4d zzzz;
+		RTM_MATRIXD_TRANSPOSE_4X3(xyz0, xyz1, xyz2, xyz3, xxxx, yyyy, zzzz);
+
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xxxx, threshold));
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), yyyy, threshold));
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), zzzz, threshold));
+
+		// Test when input == output
+		RTM_MATRIXD_TRANSPOSE_4X3(xyz0, xyz1, xyz2, xyz3, xyz0, xyz1, xyz2);
+
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(1.0, 4.0, 7.0, 10.0), xyz0, threshold));
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), xyz1, threshold));
+		CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyz2, threshold));
+	}
+#endif
+
+#if 0	// not implemented yet
+	{
+		rtm::vector4d xyzw0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0);
+		rtm::vector4d xyzw1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0);
+		rtm::vector4d xyzw2 = rtm::vector_set(7.0, 8.0, 9.0, 22.0);
+
+		rtm::vector4d xxx;
+		rtm::vector4d yyy;
+		rtm::vector4d zzz;
+		rtm::vector4d www;
+		RTM_MATRIXD_TRANSPOSE_3X4(xyzw0, xyzw1, xyzw2, xxx, yyy, zzz, www);
+
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xxx, threshold));
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), yyy, threshold));
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), zzz, threshold));
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0, 21.0, 22.0), www, threshold));
+
+		// Test when input == output
+		RTM_MATRIXD_TRANSPOSE_3X4(xyzw0, xyzw1, xyzw2, xyzw0, xyzw1, xyzw2, xxx);
+
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(1.0, 4.0, 7.0), xyzw0, threshold));
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(2.0, 5.0, 8.0), xyzw1, threshold));
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), xyzw2, threshold));
+		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0, 21.0, 22.0), xxx, threshold));
+	}
+#endif
+}

From c4d41365aa62839d8fa720a0664ff7ab97bd4ce0 Mon Sep 17 00:00:00 2001
From: Nicholas Frechette <zeno490@gmail.com>
Date: Wed, 26 Mar 2025 22:13:18 -0400
Subject: [PATCH 3/6] feat(macros): add matrix transpose 2x2 macro

---
 includes/rtm/impl/macros.matrix.impl.h | 66 ++++++++++++++++++++++++++
 tests/sources/test_macros_matrix.cpp   | 36 ++++++++++++++
 2 files changed, 102 insertions(+)

diff --git a/includes/rtm/impl/macros.matrix.impl.h b/includes/rtm/impl/macros.matrix.impl.h
index 631e3612..9da1ac72 100644
--- a/includes/rtm/impl/macros.matrix.impl.h
+++ b/includes/rtm/impl/macros.matrix.impl.h
@@ -185,6 +185,72 @@ RTM_IMPL_FILE_PRAGMA_PUSH
 		} while(0)
 #endif
 
+#if defined(RTM_NEON_INTRINSICS)
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 2x2 matrix.
+	// All inputs and outputs must be rtm::vector4f.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXF_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \
+		do { \
+			const float32x4_t x0x1y0y1 = vzip1q_f32((input_xy0), (input_xy1)); \
+			(output_xx) = x0x1y0y1; \
+			(output_yy) = vextq_f32(x0x1y0y1, x0x1y0y1, 2); \
+		} while(0)
+#elif defined(RTM_SSE2_INTRINSICS)
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 2x2 matrix.
+	// All inputs and outputs must be rtm::vector4f.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXF_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \
+		do { \
+			const __m128 x0x1y0y1 = _mm_unpacklo_ps((input_xy0), (input_xy1)); \
+			(output_xx) = x0x1y0y1; \
+			(output_yy) = _mm_movehl_ps(x0x1y0y1, x0x1y0y1); \
+		} while(0)
+#else
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 2x2 matrix.
+	// All inputs and outputs must be rtm::vector4f.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXF_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \
+		do { \
+			const float input_x0 = (input_xy0).x; \
+			const float input_y0 = (input_xy0).y; \
+			const float input_x1 = (input_xy1).x; \
+			const float input_y1 = (input_xy1).y; \
+			(output_xx) = RTM_IMPL_NAMESPACE::vector4f { input_x0, input_x1, input_x0, input_x1 }; \
+			(output_yy) = RTM_IMPL_NAMESPACE::vector4f { input_y0, input_y1, input_y0, input_y1 }; \
+		} while(0)
+#endif
+
+#if defined(RTM_SSE2_INTRINSICS)
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 2x2 matrix.
+	// All inputs and outputs must be rtm::vector4d.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXD_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \
+		do { \
+			const __m128d x0x1 = _mm_unpacklo_pd((input_xy0).xy, (input_xy1).xy); \
+			const __m128d y0y1 = _mm_unpackhi_pd((input_xy0).xy, (input_xy1).xy); \
+			(output_xx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, x0x1 }; \
+			(output_yy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y0y1 }; \
+		} while(0)
+#else
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 2x2 matrix.
+	// All inputs and outputs must be rtm::vector4d.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXD_TRANSPOSE_2X2(input_xy0, input_xy1, output_xx, output_yy) \
+		do { \
+			const double input_x0 = (input_xy0).x; \
+			const double input_y0 = (input_xy0).y; \
+			const double input_x1 = (input_xy1).x; \
+			const double input_y1 = (input_xy1).y; \
+			(output_xx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x0, input_x1 }; \
+			(output_yy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y0, input_y1 }; \
+		} while(0)
+#endif
+
 #if defined(RTM_NEON_INTRINSICS)
 	//////////////////////////////////////////////////////////////////////////
 	// Transposes a 4x3 matrix.
diff --git a/tests/sources/test_macros_matrix.cpp b/tests/sources/test_macros_matrix.cpp
index 58f9f44c..76272819 100644
--- a/tests/sources/test_macros_matrix.cpp
+++ b/tests/sources/test_macros_matrix.cpp
@@ -32,6 +32,24 @@ TEST_CASE("macros matrixf", "[math][macros][matrix]")
 {
 	const float threshold = 0.0F;	// Result must be binary exact!
 
+	{
+		rtm::vector4f xy0 = rtm::vector_set(1.0F, 2.0F, 3.0F);
+		rtm::vector4f xy1 = rtm::vector_set(4.0F, 5.0F, 6.0F);
+
+		rtm::vector4f xx;
+		rtm::vector4f yy;
+		RTM_MATRIXF_TRANSPOSE_2X2(xy0, xy1, xx, yy);
+
+		CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0F, 4.0F, 7.0F), xx, threshold));
+		CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0F, 5.0F, 8.0F), yy, threshold));
+
+		// Test when input == output
+		RTM_MATRIXF_TRANSPOSE_2X2(xy0, xy1, xy0, xy1);
+
+		CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0F, 4.0F, 7.0F), xy0, threshold));
+		CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0F, 5.0F, 8.0F), xy1, threshold));
+	}
+
 	{
 		rtm::vector4f xyz0 = rtm::vector_set(1.0F, 2.0F, 3.0F);
 		rtm::vector4f xyz1 = rtm::vector_set(4.0F, 5.0F, 6.0F);
@@ -133,6 +151,24 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]")
 {
 	const double threshold = 0.0;	// Result must be binary exact!
 
+	{
+		rtm::vector4d xy0 = rtm::vector_set(1.0, 2.0, 3.0);
+		rtm::vector4d xy1 = rtm::vector_set(4.0, 5.0, 6.0);
+
+		rtm::vector4d xx;
+		rtm::vector4d yy;
+		RTM_MATRIXD_TRANSPOSE_2X2(xy0, xy1, xx, yy);
+
+		CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0, 4.0, 7.0), xx, threshold));
+		CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0, 5.0, 8.0), yy, threshold));
+
+		// Test when input == output
+		RTM_MATRIXD_TRANSPOSE_2X2(xy0, xy1, xy0, xy1);
+
+		CHECK(rtm::vector_all_near_equal2(rtm::vector_set(1.0, 4.0, 7.0), xy0, threshold));
+		CHECK(rtm::vector_all_near_equal2(rtm::vector_set(2.0, 5.0, 8.0), xy1, threshold));
+	}
+
 	{
 		rtm::vector4d xyz0 = rtm::vector_set(1.0, 2.0, 3.0);
 		rtm::vector4d xyz1 = rtm::vector_set(4.0, 5.0, 6.0);

From da15bd85877782bbec4b88e956101e6f7f265a20 Mon Sep 17 00:00:00 2001
From: Nicholas Frechette <zeno490@gmail.com>
Date: Wed, 26 Mar 2025 22:22:13 -0400
Subject: [PATCH 4/6] feat(macros): add matrixd 4x4 transpose

---
 includes/rtm/impl/macros.matrix.impl.h | 50 ++++++++++++++++++++++++++
 tests/sources/test_macros_matrix.cpp   |  2 --
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/includes/rtm/impl/macros.matrix.impl.h b/includes/rtm/impl/macros.matrix.impl.h
index 9da1ac72..853289ed 100644
--- a/includes/rtm/impl/macros.matrix.impl.h
+++ b/includes/rtm/impl/macros.matrix.impl.h
@@ -97,6 +97,56 @@ RTM_IMPL_FILE_PRAGMA_PUSH
 		} while(0)
 #endif
 
+#if defined(RTM_SSE2_INTRINSICS)
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 4x4 matrix.
+	// All inputs and outputs must be rtm::vector4d.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXD_TRANSPOSE_4X4(input_xyzw0, input_xyzw1, input_xyzw2, input_xyzw3, output_xxxx, output_yyyy, output_zzzz, output_wwww) \
+		do { \
+			const __m128d x0x1 = _mm_unpacklo_pd(input_xyzw0.xy, input_xyzw1.xy); \
+			const __m128d y0y1 = _mm_unpackhi_pd(input_xyzw0.xy, input_xyzw1.xy); \
+			const __m128d x2x3 = _mm_unpacklo_pd(input_xyzw2.xy, input_xyzw3.xy); \
+			const __m128d y2y3 = _mm_unpackhi_pd(input_xyzw2.xy, input_xyzw3.xy); \
+			const __m128d z0z1 = _mm_unpacklo_pd(input_xyzw0.zw, input_xyzw1.zw); \
+			const __m128d w0w1 = _mm_unpackhi_pd(input_xyzw0.zw, input_xyzw1.zw); \
+			const __m128d z2z3 = _mm_unpacklo_pd(input_xyzw2.zw, input_xyzw3.zw); \
+			const __m128d w2w3 = _mm_unpackhi_pd(input_xyzw2.zw, input_xyzw3.zw); \
+			(output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, x2x3 }; \
+			(output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y2y3 }; \
+			(output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { z0z1, z2z3 }; \
+			(output_wwww) = RTM_IMPL_NAMESPACE::vector4d { w0w1, w2w3 }; \
+		} while(0)
+#else
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 4x4 matrix.
+	// All inputs and outputs must be rtm::vector4d.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXD_TRANSPOSE_4X4(input_xyzw0, input_xyzw1, input_xyzw2, input_xyzw3, output_xxxx, output_yyyy, output_zzzz, output_wwww) \
+		do { \
+			const double input_x0 = (input_xyzw0).x; \
+			const double input_y0 = (input_xyzw0).y; \
+			const double input_z0 = (input_xyzw0).z; \
+			const double input_w0 = (input_xyzw0).w; \
+			const double input_x1 = (input_xyzw1).x; \
+			const double input_y1 = (input_xyzw1).y; \
+			const double input_z1 = (input_xyzw1).z; \
+			const double input_w1 = (input_xyzw1).w; \
+			const double input_x2 = (input_xyzw2).x; \
+			const double input_y2 = (input_xyzw2).y; \
+			const double input_z2 = (input_xyzw2).z; \
+			const double input_w2 = (input_xyzw2).w; \
+			const double input_x3 = (input_xyzw3).x; \
+			const double input_y3 = (input_xyzw3).y; \
+			const double input_z3 = (input_xyzw3).z; \
+			const double input_w3 = (input_xyzw3).w; \
+			(output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x2, input_x3 }; \
+			(output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y2, input_y3 }; \
+			(output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { input_z0, input_z1, input_z2, input_z3 }; \
+			(output_wwww) = RTM_IMPL_NAMESPACE::vector4d { input_w0, input_w1, input_w2, input_w3 }; \
+		} while(0)
+#endif
+
 #if defined(RTM_NEON_INTRINSICS)
 	//////////////////////////////////////////////////////////////////////////
 	// Transposes a 3x3 matrix.
diff --git a/tests/sources/test_macros_matrix.cpp b/tests/sources/test_macros_matrix.cpp
index 76272819..d47e932f 100644
--- a/tests/sources/test_macros_matrix.cpp
+++ b/tests/sources/test_macros_matrix.cpp
@@ -191,7 +191,6 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]")
 		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), xyz2, threshold));
 	}
 
-#if 0	// not implemented yet
 	{
 		rtm::vector4d xyzw0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0);
 		rtm::vector4d xyzw1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0);
@@ -217,7 +216,6 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]")
 		CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyzw2, threshold));
 		CHECK(rtm::vector_all_near_equal(rtm::vector_set(20.0, 21.0, 22.0, 23.0), xyzw3, threshold));
 	}
-#endif
 
 #if 0	// not implemented yet
 	{

From 1766d960200c8bf17a5e18f6d4195fb284f7cabc Mon Sep 17 00:00:00 2001
From: Nicholas Frechette <zeno490@gmail.com>
Date: Wed, 26 Mar 2025 22:26:34 -0400
Subject: [PATCH 5/6] feat(macros): add matrixd 4x3 transpose

---
 includes/rtm/impl/macros.matrix.impl.h | 42 ++++++++++++++++++++++++++
 tests/sources/test_macros_matrix.cpp   |  2 --
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/includes/rtm/impl/macros.matrix.impl.h b/includes/rtm/impl/macros.matrix.impl.h
index 853289ed..86f8c242 100644
--- a/includes/rtm/impl/macros.matrix.impl.h
+++ b/includes/rtm/impl/macros.matrix.impl.h
@@ -356,6 +356,48 @@ RTM_IMPL_FILE_PRAGMA_PUSH
 		} while(0)
 #endif
 
+#if defined(RTM_SSE2_INTRINSICS)
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 4x3 matrix.
+	// All inputs and outputs must be rtm::vector4d.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXD_TRANSPOSE_4X3(input_xyz0, input_xyz1, input_xyz2, input_xyz3, output_xxxx, output_yyyy, output_zzzz) \
+		do { \
+			const __m128d x0x1 = _mm_unpacklo_pd(input_xyz0.xy, input_xyz1.xy); \
+			const __m128d y0y1 = _mm_unpackhi_pd(input_xyz0.xy, input_xyz1.xy); \
+			const __m128d x2x3 = _mm_unpacklo_pd(input_xyz2.xy, input_xyz3.xy); \
+			const __m128d y2y3 = _mm_unpackhi_pd(input_xyz2.xy, input_xyz3.xy); \
+			const __m128d z0z1 = _mm_unpacklo_pd(input_xyz0.zw, input_xyz1.zw); \
+			const __m128d z2z3 = _mm_unpacklo_pd(input_xyz2.zw, input_xyz3.zw); \
+			(output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, x2x3 }; \
+			(output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y2y3 }; \
+			(output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { z0z1, z2z3 }; \
+		} while(0)
+#else
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 4x3 matrix.
+	// All inputs and outputs must be rtm::vector4d.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXD_TRANSPOSE_4X3(input_xyz0, input_xyz1, input_xyz2, input_xyz3, output_xxxx, output_yyyy, output_zzzz) \
+		do { \
+			const double input_x0 = (input_xyz0).x; \
+			const double input_y0 = (input_xyz0).y; \
+			const double input_z0 = (input_xyz0).z; \
+			const double input_x1 = (input_xyz1).x; \
+			const double input_y1 = (input_xyz1).y; \
+			const double input_z1 = (input_xyz1).z; \
+			const double input_x2 = (input_xyz2).x; \
+			const double input_y2 = (input_xyz2).y; \
+			const double input_z2 = (input_xyz2).z; \
+			const double input_x3 = (input_xyz3).x; \
+			const double input_y3 = (input_xyz3).y; \
+			const double input_z3 = (input_xyz3).z; \
+			(output_xxxx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x2, input_x3 }; \
+			(output_yyyy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y2, input_y3 }; \
+			(output_zzzz) = RTM_IMPL_NAMESPACE::vector4d { input_z0, input_z1, input_z2, input_z3 }; \
+		} while(0)
+#endif
+
 #if defined(RTM_NEON_INTRINSICS)
 	//////////////////////////////////////////////////////////////////////////
 	// Transposes a 3x4 matrix.
diff --git a/tests/sources/test_macros_matrix.cpp b/tests/sources/test_macros_matrix.cpp
index d47e932f..412e2122 100644
--- a/tests/sources/test_macros_matrix.cpp
+++ b/tests/sources/test_macros_matrix.cpp
@@ -217,7 +217,6 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]")
 		CHECK(rtm::vector_all_near_equal(rtm::vector_set(20.0, 21.0, 22.0, 23.0), xyzw3, threshold));
 	}
 
-#if 0	// not implemented yet
 	{
 		rtm::vector4d xyz0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0);
 		rtm::vector4d xyz1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0);
@@ -240,7 +239,6 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]")
 		CHECK(rtm::vector_all_near_equal(rtm::vector_set(2.0, 5.0, 8.0, 11.0), xyz1, threshold));
 		CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyz2, threshold));
 	}
-#endif
 
 #if 0	// not implemented yet
 	{

From d9a2c295b9b90ec7653091285bd86a993286178e Mon Sep 17 00:00:00 2001
From: Nicholas Frechette <zeno490@gmail.com>
Date: Wed, 26 Mar 2025 22:33:23 -0400
Subject: [PATCH 6/6] feat(macros): add matrixd 3x4 transpose

---
 includes/rtm/impl/macros.matrix.impl.h | 44 ++++++++++++++++++++++++++
 tests/sources/test_macros_matrix.cpp   |  2 --
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/includes/rtm/impl/macros.matrix.impl.h b/includes/rtm/impl/macros.matrix.impl.h
index 86f8c242..d7f3bc84 100644
--- a/includes/rtm/impl/macros.matrix.impl.h
+++ b/includes/rtm/impl/macros.matrix.impl.h
@@ -455,4 +455,48 @@ RTM_IMPL_FILE_PRAGMA_PUSH
 		} while(0)
 #endif
 
+#if defined(RTM_SSE2_INTRINSICS)
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 3x4 matrix.
+	// All inputs and outputs must be rtm::vector4d.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXD_TRANSPOSE_3X4(input_xyzw0, input_xyzw1, input_xyzw2, output_xxx, output_yyy, output_zzz, output_www) \
+		do { \
+			const __m128d x0x1 = _mm_unpacklo_pd(input_xyzw0.xy, input_xyzw1.xy); \
+			const __m128d y0y1 = _mm_unpackhi_pd(input_xyzw0.xy, input_xyzw1.xy); \
+			const __m128d y2y2 = _mm_unpackhi_pd(input_xyzw2.xy, input_xyzw2.xy); \
+			const __m128d z0z1 = _mm_unpacklo_pd(input_xyzw0.zw, input_xyzw1.zw); \
+			const __m128d w0w1 = _mm_unpackhi_pd(input_xyzw0.zw, input_xyzw1.zw); \
+			const __m128d w2w2 = _mm_unpackhi_pd(input_xyzw2.zw, input_xyzw2.zw); \
+			(output_xxx) = RTM_IMPL_NAMESPACE::vector4d { x0x1, input_xyzw2.xy }; \
+			(output_yyy) = RTM_IMPL_NAMESPACE::vector4d { y0y1, y2y2 }; \
+			(output_zzz) = RTM_IMPL_NAMESPACE::vector4d { z0z1, input_xyzw2.zw }; \
+			(output_www) = RTM_IMPL_NAMESPACE::vector4d { w0w1, w2w2 }; \
+		} while(0)
+#else
+	//////////////////////////////////////////////////////////////////////////
+	// Transposes a 3x4 matrix.
+	// All inputs and outputs must be rtm::vector4d.
+	//////////////////////////////////////////////////////////////////////////
+	#define RTM_MATRIXD_TRANSPOSE_3X4(input_xyzw0, input_xyzw1, input_xyzw2, output_xxx, output_yyy, output_zzz, output_www) \
+		do { \
+			const double input_x0 = (input_xyzw0).x; \
+			const double input_y0 = (input_xyzw0).y; \
+			const double input_z0 = (input_xyzw0).z; \
+			const double input_w0 = (input_xyzw0).w; \
+			const double input_x1 = (input_xyzw1).x; \
+			const double input_y1 = (input_xyzw1).y; \
+			const double input_z1 = (input_xyzw1).z; \
+			const double input_w1 = (input_xyzw1).w; \
+			const double input_x2 = (input_xyzw2).x; \
+			const double input_y2 = (input_xyzw2).y; \
+			const double input_z2 = (input_xyzw2).z; \
+			const double input_w2 = (input_xyzw2).w; \
+			(output_xxx) = RTM_IMPL_NAMESPACE::vector4d { input_x0, input_x1, input_x2, input_x2 }; \
+			(output_yyy) = RTM_IMPL_NAMESPACE::vector4d { input_y0, input_y1, input_y2, input_y2 }; \
+			(output_zzz) = RTM_IMPL_NAMESPACE::vector4d { input_z0, input_z1, input_z2, input_z2 }; \
+			(output_www) = RTM_IMPL_NAMESPACE::vector4d { input_w0, input_w1, input_w2, input_w2 }; \
+		} while(0)
+#endif
+
 RTM_IMPL_FILE_PRAGMA_POP
diff --git a/tests/sources/test_macros_matrix.cpp b/tests/sources/test_macros_matrix.cpp
index 412e2122..0874ee26 100644
--- a/tests/sources/test_macros_matrix.cpp
+++ b/tests/sources/test_macros_matrix.cpp
@@ -240,7 +240,6 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]")
 		CHECK(rtm::vector_all_near_equal(rtm::vector_set(3.0, 6.0, 9.0, 12.0), xyz2, threshold));
 	}
 
-#if 0	// not implemented yet
 	{
 		rtm::vector4d xyzw0 = rtm::vector_set(1.0, 2.0, 3.0, 20.0);
 		rtm::vector4d xyzw1 = rtm::vector_set(4.0, 5.0, 6.0, 21.0);
@@ -265,5 +264,4 @@ TEST_CASE("macros matrixd", "[math][macros][matrix]")
 		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(3.0, 6.0, 9.0), xyzw2, threshold));
 		CHECK(rtm::vector_all_near_equal3(rtm::vector_set(20.0, 21.0, 22.0), xxx, threshold));
 	}
-#endif
 }