Skip to content

Commit

Permalink
flesh out spherical isometries
Browse files Browse the repository at this point in the history
  • Loading branch information
MikaelSlevinsky committed Aug 20, 2020
1 parent 6373913 commit 278dcd8
Show file tree
Hide file tree
Showing 18 changed files with 962 additions and 55 deletions.
2 changes: 1 addition & 1 deletion Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ ASM = src/recurrence/recurrence_default.s \
src/rotations/rotations_AVX512F.s \
src/rotations/rotations_NEON.s

SRC = src/recurrence.c src/transforms.c src/rotations.c src/permute.c src/tdc.c src/drivers.c src/fftw.c
SRC = src/recurrence.c src/transforms.c src/rotations.c src/permute.c src/tdc.c src/drivers.c src/fftw.c src/isometries.c

machine := $(shell $(CC) -dumpmachine | cut -d'-' -f1)

Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ tests:
$(CC) src/ftutilities.c test/test_tdc.c $(CFLAGS) -L$(LIBDIR) -l$(LIB) $(LDFLAGS) $(LDLIBS) -o test_tdc
$(CC) src/ftutilities.c test/test_drivers.c $(CFLAGS) -L$(LIBDIR) -l$(LIB) $(LDFLAGS) $(LDLIBS) -o test_drivers
$(CC) src/ftutilities.c test/test_fftw.c $(CFLAGS) -L$(LIBDIR) -l$(LIB) $(LDFLAGS) $(LDLIBS) -o test_fftw
$(CC) src/ftutilities.c test/test_isometries.c $(CFLAGS) -L$(LIBDIR) -l$(LIB) $(LDFLAGS) $(LDLIBS) -o test_isometries

examples:
$(CC) src/ftutilities.c examples/additiontheorem.c $(CFLAGS) -L$(LIBDIR) -l$(LIB) $(LDFLAGS) $(LDLIBS) -o additiontheorem
Expand All @@ -64,6 +65,7 @@ runtests:
./test_tdc
OMP_NUM_THREADS=$(FT_NUM_THREADS) ./test_drivers 1 3 0
OMP_NUM_THREADS=$(FT_NUM_THREADS) ./test_fftw 1 3 0
OMP_NUM_THREADS=$(FT_NUM_THREADS) ./test_isometries 1 3

runexamples:
./additiontheorem
Expand Down
87 changes: 85 additions & 2 deletions src/fasttransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ void ft_clenshawf(const int n, const float * c, const int incc, const int m, flo
void ft_orthogonal_polynomial_clenshaw(const int n, const double * c, const int incc, const double * A, const double * B, const double * C, const int m, double * x, double * phi0, double * f);
void ft_orthogonal_polynomial_clenshawf(const int n, const float * c, const int incc, const float * A, const float * B, const float * C, const int m, float * x, float * phi0, float * f);

void ft_orthogonal_polynomial_clenshawl(const int n, const long double * c, const int incc, const long double * A, const long double * B, const long double * C, const int m, long double * x, long double * phi0, long double * f);
void ft_eigen_eval(const int n, const double * c, const int incc, const double * A, const double * B, const double * C, const int m, double * x, const int sign, double * f);
void ft_eigen_evalf(const int n, const float * c, const int incc, const float * A, const float * B, const float * C, const int m, float * x, const int sign, float * f);
void ft_eigen_evall(const int n, const long double * c, const int incc, const long double * A, const long double * B, const long double * C, const int m, long double * x, const int sign, long double * f);
#if defined(FT_QUADMATH)
#include <quadmath.h>
typedef __float128 quadruple;
void ft_orthogonal_polynomial_clenshawq(const int n, const quadruple * c, const int incc, const quadruple * A, const quadruple * B, const quadruple * C, const int m, quadruple * x, quadruple * phi0, quadruple * f);
void ft_eigen_evalq(const int n, const quadruple * c, const int incc, const quadruple * A, const quadruple * B, const quadruple * C, const int m, quadruple * x, const int sign, quadruple * f);
#endif

#include "tdc.h"
Expand Down Expand Up @@ -498,4 +500,85 @@ void ft_execute_spinsph_synthesis(const ft_spinsphere_fftw_plan * P, ft_complex
/// Execute FFTW analysis on the sphere with spin.
void ft_execute_spinsph_analysis(const ft_spinsphere_fftw_plan * P, ft_complex * X, const int N, const int M);

/*!
\brief A static struct to store an orthogonal matrix \f$Q \in \mathbb{R}^{3\times3}\f$, such that \f$Q^\top Q = I\f$.
\f$Q\f$ has column-major storage.
*/
typedef struct {
double Q[9];
} ft_orthogonal_transformation;

/*!
\brief Every orthogonal matrix \f$Q \in \mathbb{R}^{3\times3}\f$ can be decomposed as a product of \f$ZYZ\f$ Euler angles and, if necessary, a reflection \f$R\f$ about the \f$xy\f$-plane.
\f[
Q = ZYZR,
\f]
where the \f$z\f$-axis rotations are:
\f[
Z = \begin{pmatrix} c & -s & 0\\ s & c & 0\\ 0 & 0 & 1\end{pmatrix},
\f]
the \f$y\f$-axis rotation is:
\f[
Y = \begin{pmatrix} c & 0 & -s\\ 0 & 1 & 0\\ s & 0 & c\end{pmatrix},
\f]
and the potential reflection is:
\f[
R = \begin{pmatrix} 1 & 0 & 0\\ 0 & 1 & 0\\ 0 & 0 & \pm 1\end{pmatrix}.
\f]
The reflection is stored as an integer `sign` corresponding to the bottom right entry.
*/
typedef struct {
double s[3];
double c[3];
int sign;
} ft_ZYZR;

/*!
\brief A static struct to store a reflection about the plane \f$w\cdot x = 0\f$ in \f$\mathbb{R}^3\f$.
*/
typedef struct {
double w[3];
} ft_reflection;

ft_ZYZR ft_create_ZYZR(ft_orthogonal_transformation Q);

void ft_apply_ZYZR(ft_ZYZR Q, ft_orthogonal_transformation * U);

void ft_apply_reflection(ft_reflection Q, ft_orthogonal_transformation * U);

void ft_execute_sph_polar_rotation(double * A, const int N, const int M, double s, double c);

void ft_execute_sph_polar_reflection(double * A, const int N, const int M);

typedef struct {
ft_symmetric_tridiagonal_symmetric_eigen * F11;
ft_symmetric_tridiagonal_symmetric_eigen * F21;
ft_symmetric_tridiagonal_symmetric_eigen * F12;
ft_symmetric_tridiagonal_symmetric_eigen * F22;
int l;
} ft_partial_ZY_axis_exchange_factorization;

typedef struct {
ft_partial_ZY_axis_exchange_factorization ** F;
int n;
} ft_ZY_axis_exchange_factorization;

void ft_destroy_partial_ZY_axis_exchange_factorization(ft_partial_ZY_axis_exchange_factorization * F);

void ft_destroy_ZY_axis_exchange_factorization(ft_ZY_axis_exchange_factorization * F);

ft_partial_ZY_axis_exchange_factorization * ft_create_partial_ZY_axis_exchange_factorization(const int l);

ft_ZY_axis_exchange_factorization * ft_create_ZY_axis_exchange_factorization(const int n);

void ft_execute_sph_ZY_axis_exchange(ft_ZY_axis_exchange_factorization * J, double * A, const int N, const int M);

void ft_execute_sph_isometry(ft_ZY_axis_exchange_factorization * J, ft_ZYZR Q, double * A, const int N, const int M);

void ft_execute_sph_rotation(ft_ZY_axis_exchange_factorization * J, const double alpha, const double beta, const double gamma, double * A, const int N, const int M);

void ft_execute_sph_reflection(ft_ZY_axis_exchange_factorization * J, ft_reflection W, double * A, const int N, const int M);

void ft_execute_sph_orthogonal_transformation(ft_ZY_axis_exchange_factorization * J, ft_orthogonal_transformation Q, double * A, const int N, const int M);

#endif // FASTTRANSFORMS_H
30 changes: 27 additions & 3 deletions src/ftinternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ typedef struct {
#define vloadu8(v) ((double8) _mm512_loadu_pd(v))
#define vstore8(u, v) (_mm512_store_pd(u, v))
#define vstoreu8(u, v) (_mm512_storeu_pd(u, v))
#define vsqrt8(x) ((double8) _mm512_sqrt_pd(x))
#define vmovemask8(x) (((int) _mm256_movemask_pd((double4) _mm512_extractf64x4_pd(x, 0)))*((int) _mm256_movemask_pd((double4) _mm512_extractf64x4_pd(x, 1))))
#define vfma8(a, b, c) ((double8) _mm512_fmadd_pd(a, b, c))
#define vfms8(a, b, c) ((double8) _mm512_fmsub_pd(a, b, c))
#define vfmas8(a, b, c) ((double8) _mm512_fmaddsub_pd(a, b, c))
Expand All @@ -188,6 +190,8 @@ typedef struct {
#define vloadu16f(v) ((float16) _mm512_loadu_ps(v))
#define vstore16f(u, v) (_mm512_store_ps(u, v))
#define vstoreu16f(u, v) (_mm512_storeu_ps(u, v))
#define vsqrt16f(x) ((float16) _mm512_sqrt_ps(x))
#define vmovemask16f(x) (((int) _mm256_movemask_ps((float8) _mm512_extractf32x8_ps(x, 0)))*((int) _mm256_movemask_ps((float8) _mm512_extractf32x8_ps(x, 1))))
#define vfma16f(a, b, c) ((float16) _mm512_fmadd_ps(a, b, c))
#define vfms16f(a, b, c) ((float16) _mm512_fmsub_ps(a, b, c))
#define vfmas16f(a, b, c) ((float16) _mm512_fmaddsub_ps(a, b, c))
Expand All @@ -203,13 +207,17 @@ typedef struct {
#define vloadu4(v) ((double4) _mm256_loadu_pd(v))
#define vstore4(u, v) (_mm256_store_pd(u, v))
#define vstoreu4(u, v) (_mm256_storeu_pd(u, v))
#define vas4(a, b) ((double4) _mm256_addsub_pd(a, b))
#define vsqrt4(x) ((double4) _mm256_sqrt_pd(x))
#define vmovemask4(x) ((int) _mm256_movemask_pd(x))
typedef float float8 __attribute__ ((vector_size (VECTOR_SIZE_4*8)));
#define vall8f(x) ((float8) _mm256_set1_ps(x))
#define vload8f(v) ((float8) _mm256_load_ps(v))
#define vloadu8f(v) ((float8) _mm256_loadu_ps(v))
#define vstore8f(u, v) (_mm256_store_ps(u, v))
#define vstoreu8f(u, v) (_mm256_storeu_ps(u, v))
#define vas4(a, b) ((double4) _mm256_addsub_pd(a, b))
#define vsqrt8f(x) ((float8) _mm256_sqrt_ps(x))
#define vmovemask8f(x) ((int) _mm256_movemask_ps(x))
#define vas8f(a, b) ((float8) _mm256_addsub_ps(a, b))
#ifdef __FMA__
#define vfma4(a, b, c) ((double4) _mm256_fmadd_pd(a, b, c))
Expand All @@ -231,6 +239,8 @@ typedef struct {
#define vloadu2(v) ((double2) _mm_loadu_pd(v))
#define vstore2(u, v) (_mm_store_pd(u, v))
#define vstoreu2(u, v) (_mm_storeu_pd(u, v))
#define vsqrt2(x) ((double2) _mm_sqrt_pd(x))
#define vmovemask2(x) ((int) _mm_movemask_pd(x))
#ifdef __FMA__
#define vfma2(a, b, c) ((double2) _mm_fmadd_pd(a, b, c))
#define vfms2(a, b, c) ((double2) _mm_fmsub_pd(a, b, c))
Expand All @@ -248,6 +258,8 @@ typedef struct {
#define vloadu4f(v) ((float4) _mm_loadu_ps(v))
#define vstore4f(u, v) (_mm_store_ps(u, v))
#define vstoreu4f(u, v) (_mm_storeu_ps(u, v))
#define vsqrt4f(x) ((float4) _mm_sqrt_ps(x))
#define vmovemask4f(x) ((int) _mm_movemask_ps(x))
#ifdef __FMA__
#define vfma4f(a, b, c) ((float4) _mm_fmadd_ps(a, b, c))
#define vfms4f(a, b, c) ((float4) _mm_fmsub_ps(a, b, c))
Expand Down Expand Up @@ -308,9 +320,21 @@ void orthogonal_polynomial_clenshaw_AVX_FMAf(const int n, const float * c, const
void orthogonal_polynomial_clenshaw_AVX512Ff(const int n, const float * c, const int incc, const float * A, const float * B, const float * C, const int m, float * x, float * phi0, float * f);
void orthogonal_polynomial_clenshaw_NEONf(const int n, const float * c, const int incc, const float * A, const float * B, const float * C, const int m, float * x, float * phi0, float * f);

void orthogonal_polynomial_clenshaw_defaultl(const int n, const long double * c, const int incc, const long double * A, const long double * B, const long double * C, const int m, long double * x, long double * phi0, long double * f);
void eigen_eval_default(const int n, const double * c, const int incc, const double * A, const double * B, const double * C, const int m, double * x, const int sign, double * f);
void eigen_eval_SSE2(const int n, const double * c, const int incc, const double * A, const double * B, const double * C, const int m, double * x, const int sign, double * f);
void eigen_eval_AVX(const int n, const double * c, const int incc, const double * A, const double * B, const double * C, const int m, double * x, const int sign, double * f);
void eigen_eval_AVX_FMA(const int n, const double * c, const int incc, const double * A, const double * B, const double * C, const int m, double * x, const int sign, double * f);
void eigen_eval_AVX512F(const int n, const double * c, const int incc, const double * A, const double * B, const double * C, const int m, double * x, const int sign, double * f);

void eigen_eval_defaultf(const int n, const float * c, const int incc, const float * A, const float * B, const float * C, const int m, float * x, const int sign, float * f);
void eigen_eval_SSEf(const int n, const float * c, const int incc, const float * A, const float * B, const float * C, const int m, float * x, const int sign, float * f);
void eigen_eval_AVXf(const int n, const float * c, const int incc, const float * A, const float * B, const float * C, const int m, float * x, const int sign, float * f);
void eigen_eval_AVX_FMAf(const int n, const float * c, const int incc, const float * A, const float * B, const float * C, const int m, float * x, const int sign, float * f);
void eigen_eval_AVX512Ff(const int n, const float * c, const int incc, const float * A, const float * B, const float * C, const int m, float * x, const int sign, float * f);

void eigen_eval_defaultl(const int n, const long double * c, const int incc, const long double * A, const long double * B, const long double * C, const int m, long double * x, const int sign, long double * f);
#if defined(FT_QUADMATH)
void orthogonal_polynomial_clenshaw_defaultq(const int n, const quadruple * c, const int incc, const quadruple * A, const quadruple * B, const quadruple * C, const int m, quadruple * x, quadruple * phi0, quadruple * f);
void eigen_eval_defaultq(const int n, const quadruple * c, const int incc, const quadruple * A, const quadruple * B, const quadruple * C, const int m, quadruple * x, const int sign, quadruple * f);
#endif

double * plan_legendre_to_chebyshev(const int normleg, const int normcheb, const int n);
Expand Down
Loading

0 comments on commit 278dcd8

Please sign in to comment.