Skip to content

Commit

Permalink
fix: nothing
Browse files Browse the repository at this point in the history
  • Loading branch information
NoNaeAbC committed Apr 3, 2021
1 parent e1ef4dc commit ce36428
Show file tree
Hide file tree
Showing 8 changed files with 220 additions and 227 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ cmake-build-debug
*.html
*.js
*.wasm
*.out

samples/mandelbrot

Expand Down
309 changes: 155 additions & 154 deletions amathlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,6 @@


#endif

#if defined(AML_USE_GMP) && !defined(AML_USE_AML_NUMBER)
#define AML_USE_AML_NUMBER
#endif
Expand Down Expand Up @@ -1090,159 +1089,6 @@ class AML_PREFIX(VectorU8_8D) {

#include "amllahead.h"

#if defined(USE_AVX512) && !defined(COMPLEX_DEFINITIONS) && !defined(USE_CUDA)
#define COMPLEX_DEFINITIONS

#define MAX_COMPLEX_64_SIZE 8
#define MAX_COMPLEX_64_TYPE Array8Complex64
#define MAX_COMPLEX_64_MASK_TYPE VectorU8_8D
#define MAX_COMPLEX_64_VECTOR_TYPE Vector8D_64
#define IDEAL_COMPLEX_64_SIZE 8
#define IDEAL_COMPLEX_64_TYPE Array8Complex64
#define IDEAL_COMPLEX_64_MASK_TYPE VectorU8_8D
#define IDEAL_COMPLEX_64_VECTOR_TYPE Vector8D_64
#define MIN_COMPLEX_64_SIZE 4
#define MIN_COMPLEX_64_TYPE Array4Complex64
#define MIN_COMPLEX_64_MASK_TYPE VectorU8_4D
#define MIN_COMPLEX_64_VECTOR_TYPE Vector4D_64

#define MAX_COMPLEX_32_SIZE 8
#define MAX_COMPLEX_32_TYPE Array8Complex32
#define MAX_COMPLEX_32_MASK_TYPE VectorU8_8D
#define MAX_COMPLEX_32_VECTOR_TYPE Vector8D_32
#define IDEAL_COMPLEX_32_SIZE 8
#define IDEAL_COMPLEX_32_TYPE Array8Complex32
#define IDEAL_COMPLEX_32_MASK_TYPE VectorU8_8D
#define IDEAL_COMPLEX_32_VECTOR_TYPE Vector8D_32
#define MIN_COMPLEX_32_SIZE 4
#define MIN_COMPLEX_32_TYPE Array4Complex32
#define MIN_COMPLEX_32_MASK_TYPE VectorU8_4D
#define MIN_COMPLEX_32_VECTOR_TYPE Vector4D_64


#elif defined(USE_AVX) && !defined(COMPLEX_DEFINITIONS) && !defined(USE_CUDA)
#define COMPLEX_DEFINITIONS

#define MAX_COMPLEX_64_SIZE 8
#define MAX_COMPLEX_64_TYPE Array8Complex64
#define MAX_COMPLEX_64_MASK_TYPE VectorU8_8D
#define MAX_COMPLEX_64_VECTOR_TYPE Vector8D_64
#define IDEAL_COMPLEX_64_SIZE 4
#define IDEAL_COMPLEX_64_TYPE Array4Complex64
#define IDEAL_COMPLEX_64_MASK_TYPE VectorU8_4D
#define IDEAL_COMPLEX_64_VECTOR_TYPE Vector4D_64
#define MIN_COMPLEX_64_SIZE 2
#define MIN_COMPLEX_64_TYPE Array2Complex64
#define MIN_COMPLEX_64_MASK_TYPE VectorU8_2D
#define MIN_COMPLEX_64_VECTOR_TYPE Vector2D_64

#define MAX_COMPLEX_32_SIZE 8
#define MAX_COMPLEX_32_TYPE Array8Complex32
#define MAX_COMPLEX_32_MASK_TYPE VectorU8_8D
#define MAX_COMPLEX_32_VECTOR_TYPE Vector8D_32
#define IDEAL_COMPLEX_32_SIZE 8
#define IDEAL_COMPLEX_32_TYPE Array8Complex32
#define IDEAL_COMPLEX_32_MASK_TYPE VectorU8_8D
#define IDEAL_COMPLEX_32_VECTOR_TYPE Vector8D_32
#define MIN_COMPLEX_32_SIZE 4
#define MIN_COMPLEX_32_TYPE Array4Complex32
#define MIN_COMPLEX_32_MASK_TYPE VectorU8_4D
#define MIN_COMPLEX_32_VECTOR_TYPE Vector4D_32


#elif defined(USE_SSE) || defined(USE_NEON) || defined(USE_WASM_SIMD) && !defined(COMPLEX_DEFINITIONS) && !defined(USE_CUDA)
#define COMPLEX_DEFINITIONS

#define MAX_COMPLEX_64_SIZE 4
#define MAX_COMPLEX_64_TYPE Array4Complex64
#define MAX_COMPLEX_64_MASK_TYPE VectorU8_4D
#define MAX_COMPLEX_64_VECTOR_TYPE Vector4D_64
#define IDEAL_COMPLEX_64_SIZE 2
#define IDEAL_COMPLEX_64_TYPE Array2Complex64
#define IDEAL_COMPLEX_64_MASK_TYPE VectorU8_2D
#define IDEAL_COMPLEX_64_VECTOR_TYPE Vector2D_64
#define MIN_COMPLEX_64_SIZE 1
#define MIN_COMPLEX_64_TYPE Complex64
#define MIN_COMPLEX_64_MASK_TYPE VectorU8_1D
#define MIN_COMPLEX_64_VECTOR_TYPE Vecto1D_64


#define MAX_COMPLEX_32_SIZE 8
#define MAX_COMPLEX_32_TYPE Array8Complex32
#define MAX_COMPLEX_32_MASK_TYPE VectorU8_8D
#define MAX_COMPLEX_32_VECTOR_TYPE Vector8D_32
#define IDEAL_COMPLEX_32_SIZE 4
#define IDEAL_COMPLEX_32_TYPE Array4Complex32
#define IDEAL_COMPLEX_32_MASK_TYPE VectorU8_4D
#define IDEAL_COMPLEX_32_VECTOR_TYPE Vector4D_32
#define MIN_COMPLEX_32_SIZE 2
#define MIN_COMPLEX_32_TYPE Array2Complex32
#define MIN_COMPLEX_32_MASK_TYPE VectorU8_2D
#define MIN_COMPLEX_32_VECTOR_TYPE Vector2D_32

#elif defined(AML_USE_ARRAY_STRICT)

#define MAX_COMPLEX_64_SIZE 2
#define MAX_COMPLEX_64_TYPE Array2Complex64
#define MAX_COMPLEX_64_MASK_TYPE VectorU8_2D
#define MAX_COMPLEX_64_VECTOR_TYPE Vector2D_64
#define IDEAL_COMPLEX_64_SIZE 2
#define IDEAL_COMPLEX_64_TYPE Array2Complex64
#define IDEAL_COMPLEX_64_MASK_TYPE VectorU8_2D
#define IDEAL_COMPLEX_64_VECTOR_TYPE Vector2D_64
#define MIN_COMPLEX_64_SIZE 2
#define MIN_COMPLEX_64_TYPE Array2Complex64
#define MIN_COMPLEX_64_MASK_TYPE VectorU8_2D
#define MIN_COMPLEX_64_VECTOR_TYPE Vector2D_64


#define MAX_COMPLEX_32_SIZE 2
#define MAX_COMPLEX_32_TYPE Array2Complex32
#define MAX_COMPLEX_32_MASK_TYPE VectorU8_2D
#define MAX_COMPLEX_32_VECTOR_TYPE Vector2D_32
#define IDEAL_COMPLEX_32_SIZE 2
#define IDEAL_COMPLEX_32_TYPE Array2Complex32
#define IDEAL_COMPLEX_32_MASK_TYPE VectorU8_2D
#define IDEAL_COMPLEX_32_VECTOR_TYPE Vector2D_32
#define MIN_COMPLEX_32_SIZE 2
#define MIN_COMPLEX_32_TYPE Array2Complex32
#define MIN_COMPLEX_32_MASK_TYPE VectorU8_2D
#define MIN_COMPLEX_32_VECTOR_TYPE Vector2D_32


#elif !defined(COMPLEX_DEFINITIONS) && !defined(USE_CUDA)
#define COMPLEX_DEFINITIONS


#define MAX_COMPLEX_64_SIZE 1
#define MAX_COMPLEX_64_TYPE Complex64
#define MAX_COMPLEX_64_MASK_TYPE VectorU8_1D
#define MAX_COMPLEX_64_VECTOR_TYPE Vecto1D_64
#define IDEAL_COMPLEX_64_SIZE 1
#define IDEAL_COMPLEX_64_TYPE Complex64
#define IDEAL_COMPLEX_64_MASK_TYPE VectorU8_1D
#define IDEAL_COMPLEX_64_VECTOR_TYPE Vecto1D_64
#define MIN_COMPLEX_64_SIZE 1
#define MIN_COMPLEX_64_TYPE Complex64
#define MIN_COMPLEX_64_MASK_TYPE VectorU8_1D
#define MIN_COMPLEX_64_VECTOR_TYPE Vecto1D_64

#define MAX_COMPLEX_32_SIZE 1
#define MAX_COMPLEX_32_TYPE Complex32
#define MAX_COMPLEX_32_MASK_TYPE VectorU8_1D
#define MAX_COMPLEX_32_VECTOR_TYPE Vecto1D_32
#define IDEAL_COMPLEX_32_SIZE 1
#define IDEAL_COMPLEX_32_TYPE Complex32
#define IDEAL_COMPLEX_32_MASK_TYPE VectorU8_1D
#define IDEAL_COMPLEX_32_VECTOR_TYPE Vecto1D_32
#define MIN_COMPLEX_32_SIZE 1
#define MIN_COMPLEX_32_TYPE Complex32
#define MIN_COMPLEX_32_MASK_TYPE VectorU8_1D
#define MIN_COMPLEX_32_VECTOR_TYPE Vecto1D_32


#endif

#include "amlcomplexhead.h"

class AML_PREFIX(Array2Complex64) {
Expand Down Expand Up @@ -12117,3 +11963,158 @@ AML_FUNCTION AML_PREFIX(ComplexN) operator/(const AmlNumber &lhs, const AML_PREF
#endif

#endif //MATH_LIB_A_MATH_LIB_H


#if defined(USE_AVX512) && !defined(COMPLEX_DEFINITIONS) && !defined(USE_CUDA)
#define COMPLEX_DEFINITIONS

#define MAX_COMPLEX_64_SIZE 8
#define MAX_COMPLEX_64_TYPE Array8Complex64
#define MAX_COMPLEX_64_MASK_TYPE VectorU8_8D
#define MAX_COMPLEX_64_VECTOR_TYPE Vector8D_64
#define IDEAL_COMPLEX_64_SIZE 8
#define IDEAL_COMPLEX_64_TYPE Array8Complex64
#define IDEAL_COMPLEX_64_MASK_TYPE VectorU8_8D
#define IDEAL_COMPLEX_64_VECTOR_TYPE Vector8D_64
#define MIN_COMPLEX_64_SIZE 4
#define MIN_COMPLEX_64_TYPE Array4Complex64
#define MIN_COMPLEX_64_MASK_TYPE VectorU8_4D
#define MIN_COMPLEX_64_VECTOR_TYPE Vector4D_64

#define MAX_COMPLEX_32_SIZE 8
#define MAX_COMPLEX_32_TYPE Array8Complex32
#define MAX_COMPLEX_32_MASK_TYPE VectorU8_8D
#define MAX_COMPLEX_32_VECTOR_TYPE Vector8D_32
#define IDEAL_COMPLEX_32_SIZE 8
#define IDEAL_COMPLEX_32_TYPE Array8Complex32
#define IDEAL_COMPLEX_32_MASK_TYPE VectorU8_8D
#define IDEAL_COMPLEX_32_VECTOR_TYPE Vector8D_32
#define MIN_COMPLEX_32_SIZE 4
#define MIN_COMPLEX_32_TYPE Array4Complex32
#define MIN_COMPLEX_32_MASK_TYPE VectorU8_4D
#define MIN_COMPLEX_32_VECTOR_TYPE Vector4D_64


#elif defined(USE_AVX) && !defined(COMPLEX_DEFINITIONS) && !defined(USE_CUDA)
#define COMPLEX_DEFINITIONS

#define MAX_COMPLEX_64_SIZE 8
#define MAX_COMPLEX_64_TYPE Array8Complex64
#define MAX_COMPLEX_64_MASK_TYPE VectorU8_8D
#define MAX_COMPLEX_64_VECTOR_TYPE Vector8D_64
#define IDEAL_COMPLEX_64_SIZE 4
typedef Array4Complex64 IDEAL_COMPLEX_64_TYPE;
typedef VectorU8_4D IDEAL_COMPLEX_64_MASK_TYPE;
typedef Vector4D_64 IDEAL_COMPLEX_64_VECTOR_TYPE;
#define MIN_COMPLEX_64_SIZE 2
#define MIN_COMPLEX_64_TYPE Array2Complex64
#define MIN_COMPLEX_64_MASK_TYPE VectorU8_2D
#define MIN_COMPLEX_64_VECTOR_TYPE Vector2D_64

#define MAX_COMPLEX_32_SIZE 8
#define MAX_COMPLEX_32_TYPE Array8Complex32
#define MAX_COMPLEX_32_MASK_TYPE VectorU8_8D
#define MAX_COMPLEX_32_VECTOR_TYPE Vector8D_32
#define IDEAL_COMPLEX_32_SIZE 8
#define IDEAL_COMPLEX_32_TYPE Array8Complex32
#define IDEAL_COMPLEX_32_MASK_TYPE VectorU8_8D
#define IDEAL_COMPLEX_32_VECTOR_TYPE Vector8D_32
#define MIN_COMPLEX_32_SIZE 4
#define MIN_COMPLEX_32_TYPE Array4Complex32
#define MIN_COMPLEX_32_MASK_TYPE VectorU8_4D
#define MIN_COMPLEX_32_VECTOR_TYPE Vector4D_32


#elif defined(USE_SSE) || defined(USE_NEON) || defined(USE_WASM_SIMD) && !defined(COMPLEX_DEFINITIONS) && !defined(USE_CUDA)
#define COMPLEX_DEFINITIONS

#define MAX_COMPLEX_64_SIZE 4
#define MAX_COMPLEX_64_TYPE Array4Complex64
#define MAX_COMPLEX_64_MASK_TYPE VectorU8_4D
#define MAX_COMPLEX_64_VECTOR_TYPE Vector4D_64
#define IDEAL_COMPLEX_64_SIZE 2
#define IDEAL_COMPLEX_64_TYPE Array2Complex64
#define IDEAL_COMPLEX_64_MASK_TYPE VectorU8_2D
#define IDEAL_COMPLEX_64_VECTOR_TYPE Vector2D_64
#define MIN_COMPLEX_64_SIZE 1
#define MIN_COMPLEX_64_TYPE Complex64
#define MIN_COMPLEX_64_MASK_TYPE VectorU8_1D
#define MIN_COMPLEX_64_VECTOR_TYPE Vecto1D_64


#define MAX_COMPLEX_32_SIZE 8
#define MAX_COMPLEX_32_TYPE Array8Complex32
#define MAX_COMPLEX_32_MASK_TYPE VectorU8_8D
#define MAX_COMPLEX_32_VECTOR_TYPE Vector8D_32
#define IDEAL_COMPLEX_32_SIZE 4
#define IDEAL_COMPLEX_32_TYPE Array4Complex32
#define IDEAL_COMPLEX_32_MASK_TYPE VectorU8_4D
#define IDEAL_COMPLEX_32_VECTOR_TYPE Vector4D_32
#define MIN_COMPLEX_32_SIZE 2
#define MIN_COMPLEX_32_TYPE Array2Complex32
#define MIN_COMPLEX_32_MASK_TYPE VectorU8_2D
#define MIN_COMPLEX_32_VECTOR_TYPE Vector2D_32

#elif defined(AML_USE_ARRAY_STRICT)

#define MAX_COMPLEX_64_SIZE 2
#define MAX_COMPLEX_64_TYPE Array2Complex64
#define MAX_COMPLEX_64_MASK_TYPE VectorU8_2D
#define MAX_COMPLEX_64_VECTOR_TYPE Vector2D_64
#define IDEAL_COMPLEX_64_SIZE 2
#define IDEAL_COMPLEX_64_TYPE Array2Complex64
#define IDEAL_COMPLEX_64_MASK_TYPE VectorU8_2D
#define IDEAL_COMPLEX_64_VECTOR_TYPE Vector2D_64
#define MIN_COMPLEX_64_SIZE 2
#define MIN_COMPLEX_64_TYPE Array2Complex64
#define MIN_COMPLEX_64_MASK_TYPE VectorU8_2D
#define MIN_COMPLEX_64_VECTOR_TYPE Vector2D_64


#define MAX_COMPLEX_32_SIZE 2
#define MAX_COMPLEX_32_TYPE Array2Complex32
#define MAX_COMPLEX_32_MASK_TYPE VectorU8_2D
#define MAX_COMPLEX_32_VECTOR_TYPE Vector2D_32
#define IDEAL_COMPLEX_32_SIZE 2
#define IDEAL_COMPLEX_32_TYPE Array2Complex32
#define IDEAL_COMPLEX_32_MASK_TYPE VectorU8_2D
#define IDEAL_COMPLEX_32_VECTOR_TYPE Vector2D_32
#define MIN_COMPLEX_32_SIZE 2
#define MIN_COMPLEX_32_TYPE Array2Complex32
#define MIN_COMPLEX_32_MASK_TYPE VectorU8_2D
#define MIN_COMPLEX_32_VECTOR_TYPE Vector2D_32


#elif !defined(COMPLEX_DEFINITIONS) && !defined(USE_CUDA)
#define COMPLEX_DEFINITIONS


#define MAX_COMPLEX_64_SIZE 1
#define MAX_COMPLEX_64_TYPE Complex64
#define MAX_COMPLEX_64_MASK_TYPE VectorU8_1D
#define MAX_COMPLEX_64_VECTOR_TYPE Vecto1D_64
#define IDEAL_COMPLEX_64_SIZE 1
#define IDEAL_COMPLEX_64_TYPE Complex64
#define IDEAL_COMPLEX_64_MASK_TYPE VectorU8_1D
#define IDEAL_COMPLEX_64_VECTOR_TYPE Vecto1D_64
#define MIN_COMPLEX_64_SIZE 1
#define MIN_COMPLEX_64_TYPE Complex64
#define MIN_COMPLEX_64_MASK_TYPE VectorU8_1D
#define MIN_COMPLEX_64_VECTOR_TYPE Vecto1D_64

#define MAX_COMPLEX_32_SIZE 1
#define MAX_COMPLEX_32_TYPE Complex32
#define MAX_COMPLEX_32_MASK_TYPE VectorU8_1D
#define MAX_COMPLEX_32_VECTOR_TYPE Vecto1D_32
#define IDEAL_COMPLEX_32_SIZE 1
#define IDEAL_COMPLEX_32_TYPE Complex32
#define IDEAL_COMPLEX_32_MASK_TYPE VectorU8_1D
#define IDEAL_COMPLEX_32_VECTOR_TYPE Vecto1D_32
#define MIN_COMPLEX_32_SIZE 1
#define MIN_COMPLEX_32_TYPE Complex32
#define MIN_COMPLEX_32_MASK_TYPE VectorU8_1D
#define MIN_COMPLEX_32_VECTOR_TYPE Vecto1D_32


#endif

4 changes: 4 additions & 0 deletions amlcomplex.h
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,10 @@ class AML_POINTER_NAME(Complex) : public AML_TYPE_NAME(Complex) {
return index;
}

explicit AML_FUNCTION operator int() {
return index;
}

AML_FUNCTION void operator=(const AML_TYPE_NAME(Complex) newVal) {
c.c[0] = newVal.c.c[0];
c.c[1] = newVal.c.c[1];
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
ARGS = -Ofast -march=native -flto
ARGS = -Ofast -march=native
#ARGS =

matrixMul: matrixBenchmark.cpp
g++ matrixBenchmark.cpp -lpthread -lbenchmark $(ARGS) -std=c++20 -Wall -Wextra -o matrix_bench_gcc
clang++ matrixBenchmark.cpp -lpthread -lbenchmark $(ARGS) -std=c++20 -Wall -Wextra -o matrix_bench_clang
/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icpc matrixBenchmark.cpp -lpthread -lbenchmark $(ARGS) -std=c++20 -Wall -Wextra -o matrix_bench_icc -no-multibyte-chars
/opt/nvidia/hpc_sdk/Linux_x86_64/2021/compilers/bin/pgc++ matrixBenchmark.cpp -tp=native -std=c++20 -O4 -o matrix_bench_pgc -lpthread -lbenchmark -DDEBUG
/opt/nvidia/hpc_sdk/Linux_x86_64/2021/compilers/bin/nvc++ matrixBenchmark.cpp -tp=native -std=c++20 -O4 -o matrix_bench_pgc -lpthread -lbenchmark -DDEBUG -fast
./matrix_bench_gcc
./matrix_bench_clang
./matrix_bench_icc
Expand All @@ -16,7 +16,7 @@ complex: complexBenchmark.cpp
g++ complexBenchmark.cpp -lpthread -lbenchmark $(ARGS) -std=c++20 -Wall -Wextra -o complex_bench_gcc
clang++ complexBenchmark.cpp -lpthread -lbenchmark $(ARGS) -std=c++20 -Wall -Wextra -o complex_bench_clang
/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icpc complexBenchmark.cpp -lpthread -lbenchmark $(ARGS) -std=c++20 -Wall -Wextra -o complex_bench_icc -no-multibyte-chars
/opt/nvidia/hpc_sdk/Linux_x86_64/2021/compilers/bin/pgc++ complexBenchmark.cpp -tp=native -std=c++20 -O4 -o complex_bench_pgc -lpthread -lbenchmark -DDEBUG
/opt/nvidia/hpc_sdk/Linux_x86_64/2021/compilers/bin/nvc++ complexBenchmark.cpp -tp=native -std=c++20 -O4 -o complex_bench_pgc -lpthread -lbenchmark -DDEBUG
./complex_bench_gcc
./complex_bench_clang
./complex_bench_icc
Expand Down
Loading

0 comments on commit ce36428

Please sign in to comment.