diff --git a/libsrc/general/ngsimd.hpp b/libsrc/general/ngsimd.hpp index c12b5aff..7475190b 100644 --- a/libsrc/general/ngsimd.hpp +++ b/libsrc/general/ngsimd.hpp @@ -16,31 +16,31 @@ #ifdef WIN32 #ifndef AVX_OPERATORS_DEFINED #define AVX_OPERATORS_DEFINED -inline __m128d operator- (__m128d a) { return _mm_xor_pd(a, _mm_set1_pd(-0.0)); } -inline __m128d operator+ (__m128d a, __m128d b) { return _mm_add_pd(a,b); } -inline __m128d operator- (__m128d a, __m128d b) { return _mm_sub_pd(a,b); } -inline __m128d operator* (__m128d a, __m128d b) { return _mm_mul_pd(a,b); } -inline __m128d operator/ (__m128d a, __m128d b) { return _mm_div_pd(a,b); } -inline __m128d operator* (double a, __m128d b) { return _mm_set1_pd(a)*b; } -inline __m128d operator* (__m128d b, double a) { return _mm_set1_pd(a)*b; } +NG_INLINE __m128d operator- (__m128d a) { return _mm_xor_pd(a, _mm_set1_pd(-0.0)); } +NG_INLINE __m128d operator+ (__m128d a, __m128d b) { return _mm_add_pd(a,b); } +NG_INLINE __m128d operator- (__m128d a, __m128d b) { return _mm_sub_pd(a,b); } +NG_INLINE __m128d operator* (__m128d a, __m128d b) { return _mm_mul_pd(a,b); } +NG_INLINE __m128d operator/ (__m128d a, __m128d b) { return _mm_div_pd(a,b); } +NG_INLINE __m128d operator* (double a, __m128d b) { return _mm_set1_pd(a)*b; } +NG_INLINE __m128d operator* (__m128d b, double a) { return _mm_set1_pd(a)*b; } -inline __m128d operator+= (__m128d &a, __m128d b) { return a = a+b; } -inline __m128d operator-= (__m128d &a, __m128d b) { return a = a-b; } -inline __m128d operator*= (__m128d &a, __m128d b) { return a = a*b; } -inline __m128d operator/= (__m128d &a, __m128d b) { return a = a/b; } +NG_INLINE __m128d operator+= (__m128d &a, __m128d b) { return a = a+b; } +NG_INLINE __m128d operator-= (__m128d &a, __m128d b) { return a = a-b; } +NG_INLINE __m128d operator*= (__m128d &a, __m128d b) { return a = a*b; } +NG_INLINE __m128d operator/= (__m128d &a, __m128d b) { return a = a/b; } -inline __m256d operator- (__m256d a) { return _mm256_xor_pd(a, _mm256_set1_pd(-0.0)); } -inline __m256d operator+ (__m256d a, __m256d b) { return _mm256_add_pd(a,b); } -inline __m256d operator- (__m256d a, __m256d b) { return _mm256_sub_pd(a,b); } -inline __m256d operator* (__m256d a, __m256d b) { return _mm256_mul_pd(a,b); } -inline __m256d operator/ (__m256d a, __m256d b) { return _mm256_div_pd(a,b); } -inline __m256d operator* (double a, __m256d b) { return _mm256_set1_pd(a)*b; } -inline __m256d operator* (__m256d b, double a) { return _mm256_set1_pd(a)*b; } +NG_INLINE __m256d operator- (__m256d a) { return _mm256_xor_pd(a, _mm256_set1_pd(-0.0)); } +NG_INLINE __m256d operator+ (__m256d a, __m256d b) { return _mm256_add_pd(a,b); } +NG_INLINE __m256d operator- (__m256d a, __m256d b) { return _mm256_sub_pd(a,b); } +NG_INLINE __m256d operator* (__m256d a, __m256d b) { return _mm256_mul_pd(a,b); } +NG_INLINE __m256d operator/ (__m256d a, __m256d b) { return _mm256_div_pd(a,b); } +NG_INLINE __m256d operator* (double a, __m256d b) { return _mm256_set1_pd(a)*b; } +NG_INLINE __m256d operator* (__m256d b, double a) { return _mm256_set1_pd(a)*b; } -inline __m256d operator+= (__m256d &a, __m256d b) { return a = a+b; } -inline __m256d operator-= (__m256d &a, __m256d b) { return a = a-b; } -inline __m256d operator*= (__m256d &a, __m256d b) { return a = a*b; } -inline __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; } +NG_INLINE __m256d operator+= (__m256d &a, __m256d b) { return a = a+b; } +NG_INLINE __m256d operator-= (__m256d &a, __m256d b) { return a = a-b; } +NG_INLINE __m256d operator*= (__m256d &a, __m256d b) { return a = a*b; } +NG_INLINE __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; } #endif #endif @@ -71,27 +71,27 @@ namespace ngsimd template // a*b+c - inline auto FMA(T1 a, T2 b, T3 c) + NG_INLINE auto FMA(T1 a, T2 b, T3 c) { return a*b+c; } template::value, int>::type = 0> - inline SIMD operator+ (T a, SIMD b) { return SIMD(a) + b; } + NG_INLINE SIMD operator+ (T a, SIMD b) { return SIMD(a) + b; } template::value, int>::type = 0> - inline SIMD operator- (T a, SIMD b) { return SIMD(a) - b; } + NG_INLINE SIMD operator- (T a, SIMD b) { return SIMD(a) - b; } template::value, int>::type = 0> - inline SIMD operator* (T a, SIMD b) { return SIMD(a) * b; } + NG_INLINE SIMD operator* (T a, SIMD b) { return SIMD(a) * b; } template::value, int>::type = 0> - inline SIMD operator/ (T a, SIMD b) { return SIMD(a) / b; } + NG_INLINE SIMD operator/ (T a, SIMD b) { return SIMD(a) / b; } template::value, int>::type = 0> - inline SIMD operator+ (SIMD a, T b) { return a + SIMD(b); } + NG_INLINE SIMD operator+ (SIMD a, T b) { return a + SIMD(b); } template::value, int>::type = 0> - inline SIMD operator- (SIMD a, T b) { return a - SIMD(b); } + NG_INLINE SIMD operator- (SIMD a, T b) { return a - SIMD(b); } template::value, int>::type = 0> - inline SIMD operator* (SIMD a, T b) { return a * SIMD(b); } + NG_INLINE SIMD operator* (SIMD a, T b) { return a * SIMD(b); } template::value, int>::type = 0> - inline SIMD operator/ (SIMD a, T b) { return a / SIMD(b); } + NG_INLINE SIMD operator/ (SIMD a, T b) { return a / SIMD(b); } #ifdef __AVX__ @@ -141,43 +141,43 @@ using std::fabs; }; using std::exp; - template inline SIMD exp (SIMD a) + template NG_INLINE SIMD exp (SIMD a) { return SIMD([&](int i)->double { return exp(a[i]); } ); } using std::log; - template inline SIMD log (SIMD a) + template NG_INLINE SIMD log (SIMD a) { return SIMD([&](int i)->double { return log(a[i]); } ); } using std::pow; - template inline SIMD pow (SIMD a, double x) + template NG_INLINE SIMD pow (SIMD a, double x) { return SIMD([&](int i)->double { return pow(a[i],x); } ); } using std::sin; - template inline SIMD sin (SIMD a) + template NG_INLINE SIMD sin (SIMD a) { return SIMD([&](int i)->double { return sin(a[i]); } ); } using std::cos; - template inline SIMD cos (SIMD a) + template NG_INLINE SIMD cos (SIMD a) { return SIMD([&](int i)->double { return cos(a[i]); } ); } using std::tan; - template inline SIMD tan (SIMD a) + template NG_INLINE SIMD tan (SIMD a) { return SIMD([&](int i)->double { return tan(a[i]); } ); } using std::atan; - template inline SIMD atan (SIMD a) + template NG_INLINE SIMD atan (SIMD a) { return SIMD([&](int i)->double { return atan(a[i]); } ); } @@ -216,39 +216,39 @@ using std::fabs; data = *p; } - inline operator double() const { return data; } - inline double operator[] (int i) const { return ((double*)(&data))[i]; } - inline double Data() const { return data; } - inline double & Data() { return data; } + NG_INLINE operator double() const { return data; } + NG_INLINE double operator[] (int i) const { return ((double*)(&data))[i]; } + NG_INLINE double Data() const { return data; } + NG_INLINE double & Data() { return data; } - inline SIMD &operator+= (SIMD b) { data+=b.Data(); return *this; } - inline SIMD &operator-= (SIMD b) { data-=b.Data(); return *this; } - inline SIMD &operator*= (SIMD b) { data*=b.Data(); return *this; } - inline SIMD &operator/= (SIMD b) { data/=b.Data(); return *this; } + NG_INLINE SIMD &operator+= (SIMD b) { data+=b.Data(); return *this; } + NG_INLINE SIMD &operator-= (SIMD b) { data-=b.Data(); return *this; } + NG_INLINE SIMD &operator*= (SIMD b) { data*=b.Data(); return *this; } + NG_INLINE SIMD &operator/= (SIMD b) { data/=b.Data(); return *this; } }; - inline SIMD operator+ (SIMD a, SIMD b) { return a.Data()+b.Data(); } - inline SIMD operator- (SIMD a, SIMD b) { return a.Data()-b.Data(); } - inline SIMD operator- (SIMD a) { return -a.Data(); } - inline SIMD operator* (SIMD a, SIMD b) { return a.Data()*b.Data(); } - inline SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } + NG_INLINE SIMD operator+ (SIMD a, SIMD b) { return a.Data()+b.Data(); } + NG_INLINE SIMD operator- (SIMD a, SIMD b) { return a.Data()-b.Data(); } + NG_INLINE SIMD operator- (SIMD a) { return -a.Data(); } + NG_INLINE SIMD operator* (SIMD a, SIMD b) { return a.Data()*b.Data(); } + NG_INLINE SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } - inline SIMD sqrt (SIMD a) { return std::sqrt(a.Data()); } - inline SIMD fabs (SIMD a) { return std::fabs(a.Data()); } - inline SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } - inline SIMD Trans (SIMD a) { return a; } - inline SIMD IfPos (SIMD a, SIMD b, SIMD c) + NG_INLINE SIMD sqrt (SIMD a) { return std::sqrt(a.Data()); } + NG_INLINE SIMD fabs (SIMD a) { return std::fabs(a.Data()); } + NG_INLINE SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } + NG_INLINE SIMD Trans (SIMD a) { return a; } + NG_INLINE SIMD IfPos (SIMD a, SIMD b, SIMD c) { return (a.Data() > 0) ? b : c; } - inline double HSum (SIMD sd) + NG_INLINE double HSum (SIMD sd) { return sd.Data(); } - inline auto HSum (SIMD sd1, SIMD sd2) + NG_INLINE auto HSum (SIMD sd1, SIMD sd2) { return std::make_tuple(sd1.Data(), sd2.Data()); } @@ -291,48 +291,48 @@ using std::fabs; data = _mm256_loadu_pd(p); } - inline operator __m256d() const { return data; } - inline double operator[] (int i) const { return ((double*)(&data))[i]; } - inline __m256d Data() const { return data; } - inline __m256d & Data() { return data; } + NG_INLINE operator __m256d() const { return data; } + NG_INLINE double operator[] (int i) const { return ((double*)(&data))[i]; } + NG_INLINE __m256d Data() const { return data; } + NG_INLINE __m256d & Data() { return data; } - inline SIMD &operator+= (SIMD b) { data+=b.Data(); return *this; } - inline SIMD &operator-= (SIMD b) { data-=b.Data(); return *this; } - inline SIMD &operator*= (SIMD b) { data*=b.Data(); return *this; } - inline SIMD &operator/= (SIMD b) { data/=b.Data(); return *this; } + NG_INLINE SIMD &operator+= (SIMD b) { data+=b.Data(); return *this; } + NG_INLINE SIMD &operator-= (SIMD b) { data-=b.Data(); return *this; } + NG_INLINE SIMD &operator*= (SIMD b) { data*=b.Data(); return *this; } + NG_INLINE SIMD &operator/= (SIMD b) { data/=b.Data(); return *this; } }; - inline SIMD operator+ (SIMD a, SIMD b) { return a.Data()+b.Data(); } - inline SIMD operator- (SIMD a, SIMD b) { return a.Data()-b.Data(); } - inline SIMD operator- (SIMD a) { return -a.Data(); } - inline SIMD operator* (SIMD a, SIMD b) { return a.Data()*b.Data(); } - inline SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } + NG_INLINE SIMD operator+ (SIMD a, SIMD b) { return a.Data()+b.Data(); } + NG_INLINE SIMD operator- (SIMD a, SIMD b) { return a.Data()-b.Data(); } + NG_INLINE SIMD operator- (SIMD a) { return -a.Data(); } + NG_INLINE SIMD operator* (SIMD a, SIMD b) { return a.Data()*b.Data(); } + NG_INLINE SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } - inline SIMD sqrt (SIMD a) { return _mm256_sqrt_pd(a.Data()); } - inline SIMD fabs (SIMD a) { return _mm256_max_pd(a.Data(), -a.Data()); } - inline SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } - inline SIMD Trans (SIMD a) { return a; } - inline SIMD IfPos (SIMD a, SIMD b, SIMD c) + NG_INLINE SIMD sqrt (SIMD a) { return _mm256_sqrt_pd(a.Data()); } + NG_INLINE SIMD fabs (SIMD a) { return _mm256_max_pd(a.Data(), -a.Data()); } + NG_INLINE SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } + NG_INLINE SIMD Trans (SIMD a) { return a; } + NG_INLINE SIMD IfPos (SIMD a, SIMD b, SIMD c) { auto cp = _mm256_cmp_pd (a.Data(), _mm256_setzero_pd(), _CMP_GT_OS); return _mm256_blendv_pd(c.Data(), b.Data(), cp); } - inline double HSum (SIMD sd) + NG_INLINE double HSum (SIMD sd) { __m128d hv = _mm_add_pd (_mm256_extractf128_pd(sd.Data(),0), _mm256_extractf128_pd(sd.Data(),1)); return _mm_cvtsd_f64 (_mm_hadd_pd (hv, hv)); } - inline auto HSum (SIMD sd1, SIMD sd2) + NG_INLINE auto HSum (SIMD sd1, SIMD sd2) { __m256d hv = _mm256_hadd_pd(sd1.Data(), sd2.Data()); __m128d hv2 = _mm_add_pd (_mm256_extractf128_pd(hv,0), _mm256_extractf128_pd(hv,1)); return std::make_tuple(_mm_cvtsd_f64 (hv2), _mm_cvtsd_f64(_mm_shuffle_pd (hv2, hv2, 3))); } - inline SIMD HSum (SIMD v1, SIMD v2, SIMD v3, SIMD v4) + NG_INLINE SIMD HSum (SIMD v1, SIMD v2, SIMD v3, SIMD v4) { __m256d hsum1 = _mm256_hadd_pd (v1.Data(), v2.Data()); __m256d hsum2 = _mm256_hadd_pd (v3.Data(), v4.Data()); @@ -382,54 +382,54 @@ using std::fabs; data = _mm512_loadu_pd(p); } - inline operator __m512d() const { return data; } - inline double operator[] (int i) const { return ((double*)(&data))[i]; } - inline __m512d Data() const { return data; } - inline __m512d & Data() { return data; } + NG_INLINE operator __m512d() const { return data; } + NG_INLINE double operator[] (int i) const { return ((double*)(&data))[i]; } + NG_INLINE __m512d Data() const { return data; } + NG_INLINE __m512d & Data() { return data; } - inline SIMD &operator+= (SIMD b) { data+=b.Data(); return *this; } - inline SIMD &operator-= (SIMD b) { data-=b.Data(); return *this; } - inline SIMD &operator*= (SIMD b) { data*=b.Data(); return *this; } - inline SIMD &operator/= (SIMD b) { data/=b.Data(); return *this; } + NG_INLINE SIMD &operator+= (SIMD b) { data+=b.Data(); return *this; } + NG_INLINE SIMD &operator-= (SIMD b) { data-=b.Data(); return *this; } + NG_INLINE SIMD &operator*= (SIMD b) { data*=b.Data(); return *this; } + NG_INLINE SIMD &operator/= (SIMD b) { data/=b.Data(); return *this; } }; - inline SIMD operator- (SIMD a) { return _mm512_sub_pd(_mm512_setzero_pd(), a.Data()); } + NG_INLINE SIMD operator- (SIMD a) { return _mm512_sub_pd(_mm512_setzero_pd(), a.Data()); } - inline SIMD operator+ (SIMD a, SIMD b) { return _mm512_add_pd(a.Data(),b.Data()); } - inline SIMD operator- (SIMD a, SIMD b) { return _mm512_sub_pd(a.Data(),b.Data()); } - inline SIMD operator* (SIMD a, SIMD b) { return _mm512_mul_pd(a.Data(),b.Data()); } - inline SIMD operator/ (SIMD a, SIMD b) { return _mm512_div_pd(a.Data(),b.Data()); } + NG_INLINE SIMD operator+ (SIMD a, SIMD b) { return _mm512_add_pd(a.Data(),b.Data()); } + NG_INLINE SIMD operator- (SIMD a, SIMD b) { return _mm512_sub_pd(a.Data(),b.Data()); } + NG_INLINE SIMD operator* (SIMD a, SIMD b) { return _mm512_mul_pd(a.Data(),b.Data()); } + NG_INLINE SIMD operator/ (SIMD a, SIMD b) { return _mm512_div_pd(a.Data(),b.Data()); } - inline SIMD sqrt (SIMD a) { return _mm512_sqrt_pd(a.Data()); } - inline SIMD fabs (SIMD a) { return _mm512_max_pd(a.Data(), -a.Data()); } - inline SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } - inline SIMD Trans (SIMD a) { return a; } - inline SIMD IfPos (SIMD a, SIMD b, SIMD c) + NG_INLINE SIMD sqrt (SIMD a) { return _mm512_sqrt_pd(a.Data()); } + NG_INLINE SIMD fabs (SIMD a) { return _mm512_max_pd(a.Data(), -a.Data()); } + NG_INLINE SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } + NG_INLINE SIMD Trans (SIMD a) { return a; } + NG_INLINE SIMD IfPos (SIMD a, SIMD b, SIMD c) { auto cp = _mm512_cmp_pd_mask (a.Data(), _mm512_setzero_pd(), _MM_CMPINT_GT); return _mm512_mask_blend_pd(cp, c.Data(), b.Data()); } - template<> inline auto FMA (SIMD a, SIMD b, SIMD c) + template<> NG_INLINE auto FMA (SIMD a, SIMD b, SIMD c) { return _mm512_fmadd_pd (a.Data(), b.Data(), c.Data()); } - inline double HSum (SIMD sd) + NG_INLINE double HSum (SIMD sd) { SIMD low = _mm512_extractf64x4_pd(sd.Data(),0); SIMD high = _mm512_extractf64x4_pd(sd.Data(),1); return HSum(low)+HSum(high); } - inline auto HSum (SIMD sd1, SIMD sd2) + NG_INLINE auto HSum (SIMD sd1, SIMD sd2) { return std::make_tuple(HSum(sd1), HSum(sd2)); } - inline SIMD HSum (SIMD v1, SIMD v2, SIMD v3, SIMD v4) + NG_INLINE SIMD HSum (SIMD v1, SIMD v2, SIMD v3, SIMD v4) { SIMD high1 = _mm512_extractf64x4_pd(v1.Data(),1); SIMD high2 = _mm512_extractf64x4_pd(v2.Data(),1); @@ -490,48 +490,48 @@ using std::fabs; SIMD & Get() { return NR==0 ? v0 : v1; } }; - template inline MultiSIMD operator+ (MultiSIMD a, MultiSIMD b) + template NG_INLINE MultiSIMD operator+ (MultiSIMD a, MultiSIMD b) { return MultiSIMD (a.Head()+b.Head(), a.Tail()+b.Tail()); } - template inline MultiSIMD operator+ (double a, MultiSIMD b) + template NG_INLINE MultiSIMD operator+ (double a, MultiSIMD b) { return MultiSIMD (a+b.Head(), a+b.Tail()); } - template inline MultiSIMD operator+ (MultiSIMD b, double a) + template NG_INLINE MultiSIMD operator+ (MultiSIMD b, double a) { return MultiSIMD (a+b.Head(), a+b.Tail()); } - template inline MultiSIMD operator- (MultiSIMD a, MultiSIMD b) + template NG_INLINE MultiSIMD operator- (MultiSIMD a, MultiSIMD b) { return MultiSIMD (a.Head()-b.Head(), a.Tail()-b.Tail()); } - template inline MultiSIMD operator- (double a, MultiSIMD b) + template NG_INLINE MultiSIMD operator- (double a, MultiSIMD b) { return MultiSIMD (a-b.Head(), a-b.Tail()); } - template inline MultiSIMD operator- (MultiSIMD b, double a) + template NG_INLINE MultiSIMD operator- (MultiSIMD b, double a) { return MultiSIMD (b.Head()-a, b.Tail()-a); } - template inline MultiSIMD operator- (MultiSIMD a) + template NG_INLINE MultiSIMD operator- (MultiSIMD a) { return MultiSIMD (-a.Head(), -a.Tail()); } - template inline MultiSIMD operator* (MultiSIMD a, MultiSIMD b) + template NG_INLINE MultiSIMD operator* (MultiSIMD a, MultiSIMD b) { return MultiSIMD (a.Head()*b.Head(), a.Tail()*b.Tail()); } - template inline MultiSIMD operator/ (MultiSIMD a, MultiSIMD b) + template NG_INLINE MultiSIMD operator/ (MultiSIMD a, MultiSIMD b) { return MultiSIMD (a.Head()/b.Head(), a.Tail()/b.Tail()); } - template inline MultiSIMD operator* (double a, MultiSIMD b) + template NG_INLINE MultiSIMD operator* (double a, MultiSIMD b) { return MultiSIMD ( a*b.Head(), a*b.Tail()); } - template inline MultiSIMD operator* (MultiSIMD b, double a) + template NG_INLINE MultiSIMD operator* (MultiSIMD b, double a) { return MultiSIMD ( a*b.Head(), a*b.Tail()); } - template inline MultiSIMD & operator+= (MultiSIMD & a, MultiSIMD b) + template NG_INLINE MultiSIMD & operator+= (MultiSIMD & a, MultiSIMD b) { a.Head()+=b.Head(); a.Tail()+=b.Tail(); return a; } - template inline MultiSIMD operator-= (MultiSIMD & a, double b) + template NG_INLINE MultiSIMD operator-= (MultiSIMD & a, double b) { a.Head()-=b; a.Tail()-=b; return a; } - template inline MultiSIMD operator-= (MultiSIMD & a, MultiSIMD b) + template NG_INLINE MultiSIMD operator-= (MultiSIMD & a, MultiSIMD b) { a.Head()-=b.Head(); a.Tail()-=b.Tail(); return a; } - template inline MultiSIMD & operator*= (MultiSIMD & a, MultiSIMD b) + template NG_INLINE MultiSIMD & operator*= (MultiSIMD & a, MultiSIMD b) { a.Head()*=b.Head(); a.Tail()*=b.Tail(); return a; } - template inline MultiSIMD & operator*= (MultiSIMD & a, double b) + template NG_INLINE MultiSIMD & operator*= (MultiSIMD & a, double b) { a.Head()*=b; a.Tail()*=b; return a; } - // inline MultiSIMD operator/= (MultiSIMD & a, MultiSIMD b) { return a.Data()/=b.Data(); } + // NG_INLINE MultiSIMD operator/= (MultiSIMD & a, MultiSIMD b) { return a.Data()/=b.Data(); } - inline SIMD HVSum (SIMD a) { return a; } + NG_INLINE SIMD HVSum (SIMD a) { return a; } template - inline SIMD HVSum (MultiSIMD a) { return a.Head() + HVSum(a.Tail()); } + NG_INLINE SIMD HVSum (MultiSIMD a) { return a.Head() + HVSum(a.Tail()); } - template inline double HSum (MultiSIMD a) { return HSum(HVSum(a)); } - template inline auto HSum (MultiSIMD a, MultiSIMD b) + template NG_INLINE double HSum (MultiSIMD a) { return HSum(HVSum(a)); } + template NG_INLINE auto HSum (MultiSIMD a, MultiSIMD b) { return HSum(HVSum(a), HVSum(b)); } template diff --git a/libsrc/include/mydefs.hpp b/libsrc/include/mydefs.hpp index bf3cb1e9..794e6521 100644 --- a/libsrc/include/mydefs.hpp +++ b/libsrc/include/mydefs.hpp @@ -36,6 +36,22 @@ #endif +#ifndef NG_INLINE +#ifdef __INTEL_COMPILER +#ifdef WIN32 +#define NG_INLINE __forceinline inline +#else +#define NG_INLINE __forceinline inline +#endif +#else +#ifdef __GNUC__ +#define NG_INLINE __attribute__ ((__always_inline__)) inline +#define VLA +#else +#define NG_INLINE inline +#endif +#endif +#endif // #define BASE0