diff --git a/src/fasttransforms.h b/src/fasttransforms.h index 75c75a69..431d35ff 100644 --- a/src/fasttransforms.h +++ b/src/fasttransforms.h @@ -195,6 +195,12 @@ void ft_mpfr_trmv(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t * x, mpfr_rnd_t void ft_mpfr_trsv(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t * x, mpfr_rnd_t rnd); void ft_mpfr_trmm(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t * B, int LDB, int N, mpfr_rnd_t rnd); void ft_mpfr_trsm(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t * B, int LDB, int N, mpfr_rnd_t rnd); +// C -- Julia interoperability. Julia `BigFloat` does not have the same size as `mpfr_t`. +// So we give all Julia-owned data its own address, and dereference to retrieve the number. +void ft_mpfr_trmv_ptr(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t ** x, mpfr_rnd_t rnd); +void ft_mpfr_trsv_ptr(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t ** x, mpfr_rnd_t rnd); +void ft_mpfr_trmm_ptr(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t ** B, int LDB, int N, mpfr_rnd_t rnd); +void ft_mpfr_trsm_ptr(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t ** B, int LDB, int N, mpfr_rnd_t rnd); /// A multi-precision version of \ref ft_plan_legendre_to_chebyshev that returns a dense array of connection coefficients. mpfr_t * ft_mpfr_plan_legendre_to_chebyshev(const int normleg, const int normcheb, const int n, mpfr_prec_t prec, mpfr_rnd_t rnd); diff --git a/src/transforms_mpfr.c b/src/transforms_mpfr.c index 5094cadb..10690d38 100644 --- a/src/transforms_mpfr.c +++ b/src/transforms_mpfr.c @@ -67,6 +67,60 @@ void ft_mpfr_trsm(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t * B, int LDB, i ft_mpfr_trsv(TRANS, n, A, LDA, B+j*LDB, rnd); } +// x ← A*x, x ← Aᵀ*x +void ft_mpfr_trmv_ptr(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t ** x, mpfr_rnd_t rnd) { + if (TRANS == 'N') { + for (int j = 0; j < n; j++) { + for (int i = 0; i < j; i++) + mpfr_fma(x[i][0], A[i+j*LDA], x[j][0], x[i][0], rnd); + mpfr_mul(x[j][0], A[j+j*LDA], x[j][0], rnd); + } + } + else if (TRANS == 'T') { + for (int i = n-1; i >= 0; i--) { + mpfr_mul(x[i][0], A[i+i*LDA], x[i][0], rnd); + for (int j = i-1; j >= 0; j--) + mpfr_fma(x[i][0], A[j+i*LDA], x[j][0], x[i][0], rnd); + } + } +} + +// x ← A⁻¹*x, x ← A⁻ᵀ*x +void ft_mpfr_trsv_ptr(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t ** x, mpfr_rnd_t rnd) { + if (TRANS == 'N') { + for (int j = n-1; j >= 0; j--) { + mpfr_div(x[j][0], x[j][0], A[j+j*LDA], rnd); + for (int i = 0; i < j; i++) { + mpfr_fms(x[i][0], A[i+j*LDA], x[j][0], x[i][0], rnd); + mpfr_neg(x[i][0], x[i][0], rnd); + } + } + } + else if (TRANS == 'T') { + for (int i = 0; i < n; i++) { + for (int j = 0; j < i; j++) { + mpfr_fms(x[i][0], A[j+i*LDA], x[j][0], x[i][0], rnd); + mpfr_neg(x[i][0], x[i][0], rnd); + } + mpfr_div(x[i][0], x[i][0], A[i+i*LDA], rnd); + } + } +} + +// B ← A*B, B ← Aᵀ*B +void ft_mpfr_trmm_ptr(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t ** B, int LDB, int N, mpfr_rnd_t rnd) { + #pragma omp parallel for + for (int j = 0; j < N; j++) + ft_mpfr_trmv_ptr(TRANS, n, A, LDA, B+j*LDB, rnd); +} + +// B ← A⁻¹*B, B ← A⁻ᵀ*B +void ft_mpfr_trsm_ptr(char TRANS, int n, mpfr_t * A, int LDA, mpfr_t ** B, int LDB, int N, mpfr_rnd_t rnd) { + #pragma omp parallel for + for (int j = 0; j < N; j++) + ft_mpfr_trsv_ptr(TRANS, n, A, LDA, B+j*LDB, rnd); +} + ft_mpfr_triangular_banded * ft_mpfr_calloc_triangular_banded(const int n, const int b, mpfr_prec_t prec) { mpfr_t * data = malloc(n*(b+1)*sizeof(mpfr_t)); for (int j = 0; j < n; j++)