#ifndef FILE_MYSIMD #define FILE_MYSIMD /**************************************************************************/ /* File: mysimd.hpp */ /* Author: Joachim Schoeberl */ /* Date: 25. Mar. 16 */ /**************************************************************************/ #include #ifdef WIN32 inline __m128d operator- (__m128d a) { return _mm_xor_pd(a, _mm_set1_pd(-0.0)); } inline __m128d operator+ (__m128d a, __m128d b) { return _mm_add_pd(a,b); } inline __m128d operator- (__m128d a, __m128d b) { return _mm_sub_pd(a,b); } inline __m128d operator* (__m128d a, __m128d b) { return _mm_mul_pd(a,b); } inline __m128d operator/ (__m128d a, __m128d b) { return _mm_div_pd(a,b); } inline __m128d operator* (double a, __m128d b) { return _mm_set1_pd(a)*b; } inline __m128d operator* (__m128d b, double a) { return _mm_set1_pd(a)*b; } inline __m128d operator+= (__m128d &a, __m128d b) { return a = a+b; } inline __m128d operator-= (__m128d &a, __m128d b) { return a = a-b; } inline __m128d operator*= (__m128d &a, __m128d b) { return a = a*b; } inline __m128d operator/= (__m128d &a, __m128d b) { return a = a/b; } inline __m256d operator- (__m256d a) { return _mm256_xor_pd(a, _mm256_set1_pd(-0.0)); } inline __m256d operator+ (__m256d a, __m256d b) { return _mm256_add_pd(a,b); } inline __m256d operator- (__m256d a, __m256d b) { return _mm256_sub_pd(a,b); } inline __m256d operator* (__m256d a, __m256d b) { return _mm256_mul_pd(a,b); } inline __m256d operator/ (__m256d a, __m256d b) { return _mm256_div_pd(a,b); } inline __m256d operator* (double a, __m256d b) { return _mm256_set1_pd(a)*b; } inline __m256d operator* (__m256d b, double a) { return _mm256_set1_pd(a)*b; } inline __m256d operator+= (__m256d &a, __m256d b) { return a = a+b; } inline __m256d operator-= (__m256d &a, __m256d b) { return a = a-b; } inline __m256d operator*= (__m256d &a, __m256d b) { return a = a*b; } inline __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; } #endif namespace netgen { template class SIMD; template struct has_call_operator { template static std::true_type check( decltype( sizeof(&C::operator() )) ) { return std::true_type(); } template static std::false_type check(...) { return std::false_type(); } typedef decltype( check(sizeof(char)) ) type; static constexpr type value = type(); }; #ifdef __AVX2__ template<> class alignas(32) SIMD { __m256d data; public: static constexpr int Size() { return 4; } SIMD () = default; SIMD (const SIMD &) = default; SIMD & operator= (const SIMD &) = default; SIMD (double val) { data = _mm256_set1_pd(val); } SIMD (__m256d adata) : data(adata) { ; } /* template SIMD (const T & val) { // SIMD_function(val, std::is_convertible>()); SIMD_function(val, has_call_operator::value); } */ /* template SIMD & operator= (const T & val) { // SIMD_function(val, std::is_convertible>()); SIMD_function(val, has_call_operator::value); return *this; } */ template void SIMD_function (const Function & func, std::true_type) { data = _mm256_set_pd(func(3), func(2), func(1), func(0)); } // not a function void SIMD_function (double const * p, std::false_type) { data = _mm256_loadu_pd(p); } void SIMD_function (double val, std::false_type) { data = _mm256_set1_pd(val); } void SIMD_function (__m256d _data, std::false_type) { data = _data; } inline double operator[] (int i) const { return ((double*)(&data))[i]; } inline __m256d Data() const { return data; } inline __m256d & Data() { return data; } }; inline SIMD operator+ (SIMD a, SIMD b) { return a.Data()+b.Data(); } inline SIMD operator- (SIMD a, SIMD b) { return a.Data()-b.Data(); } inline SIMD operator- (SIMD a) { return -a.Data(); } inline SIMD operator* (SIMD a, SIMD b) { return a.Data()*b.Data(); } inline SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } inline SIMD operator* (double a, SIMD b) { return SIMD(a)*b; } inline SIMD operator* (SIMD b, double a) { return SIMD(a)*b; } inline SIMD operator+= (SIMD & a, SIMD b) { return a.Data()+=b.Data(); } inline SIMD operator-= (SIMD & a, SIMD b) { return a.Data()-=b.Data(); } inline SIMD operator*= (SIMD & a, SIMD b) { return a.Data()*=b.Data(); } inline SIMD operator/= (SIMD & a, SIMD b) { return a.Data()/=b.Data(); } using std::sqrt; using std::fabs; inline SIMD sqrt (SIMD a) { return _mm256_sqrt_pd(a.Data()); } inline SIMD fabs (SIMD a) { return _mm256_max_pd(a.Data(), -a.Data()); } inline SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } inline SIMD Trans (SIMD a) { return a; } inline SIMD IfPos (SIMD a, SIMD b, SIMD c) { auto cp = _mm256_cmp_pd (a.Data(), _mm256_setzero_pd(), _CMP_GT_OS); return _mm256_blendv_pd(c.Data(), b.Data(), cp); } inline double HSum (SIMD sd) { __m128d hv = _mm_add_pd (_mm256_extractf128_pd(sd.Data(),0), _mm256_extractf128_pd(sd.Data(),1)); return _mm_cvtsd_f64 (_mm_hadd_pd (hv, hv)); } #else template<> class SIMD { double data; public: static constexpr int Size() { return 1; } SIMD () = default; SIMD (const SIMD &) = default; SIMD & operator= (const SIMD &) = default; template SIMD (const T & val) { // SIMD_function(val, std::is_convertible>()); SIMD_function(val, has_call_operator::value); } template SIMD & operator= (const T & val) { // SIMD_function(val, std::is_convertible>()); SIMD_function(val, has_call_operator::value); return *this; } template void SIMD_function (const Function & func, std::true_type) { data = func(0); } // not a function void SIMD_function (double const * p, std::false_type) { data = *p; } void SIMD_function (double val, std::false_type) { data = val; } double operator[] (int i) const { return ((double*)(&data))[i]; } double Data() const { return data; } double & Data() { return data; } }; inline SIMD operator+ (SIMD a, SIMD b) { return a.Data()+b.Data(); } inline SIMD operator- (SIMD a, SIMD b) { return a.Data()-b.Data(); } inline SIMD operator- (SIMD a) { return -a.Data(); } inline SIMD operator* (SIMD a, SIMD b) { return a.Data()*b.Data(); } inline SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } inline SIMD operator* (double a, SIMD b) { return SIMD(a)*b; } inline SIMD operator* (SIMD b, double a) { return SIMD(a)*b; } inline SIMD operator+= (SIMD & a, SIMD b) { return a.Data()+=b.Data(); } inline SIMD operator-= (SIMD & a, SIMD b) { return a.Data()-=b.Data(); } inline SIMD operator*= (SIMD & a, SIMD b) { return a.Data()*=b.Data(); } inline SIMD operator/= (SIMD & a, SIMD b) { return a.Data()/=b.Data(); } inline SIMD sqrt (SIMD a) { return std::sqrt(a.Data()); } inline SIMD fabs (SIMD a) { return std::fabs(a.Data()); } inline SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } inline SIMD Trans (SIMD a) { return a; } inline SIMD IfPos (SIMD a, SIMD b, SIMD c) { return (a.Data() > 0) ? b : c; } inline double HSum (SIMD sd) { return sd.Data(); } #endif template ostream & operator<< (ostream & ost, SIMD simd) { ost << simd[0]; for (int i = 1; i < simd.Size(); i++) ost << " " << simd[i]; return ost; } /* using std::exp; inline netgen::SIMD exp (netgen::SIMD a) { return netgen::SIMD([&](int i)->double { return exp(a[i]); } ); } using std::log; inline netgen::SIMD log (netgen::SIMD a) { return netgen::SIMD([&](int i)->double { return log(a[i]); } ); } using std::pow; inline netgen::SIMD pow (netgen::SIMD a, double x) { return netgen::SIMD([&](int i)->double { return pow(a[i],x); } ); } */ template class MultiSIMD { SIMD head; MultiSIMD tail; public: MultiSIMD () = default; MultiSIMD (const MultiSIMD & ) = default; MultiSIMD (T v) : head(v), tail(v) { ; } MultiSIMD (SIMD _head, MultiSIMD _tail) : head(_head), tail(_tail) { ; } SIMD Head() const { return head; } MultiSIMD Tail() const { return tail; } SIMD & Head() { return head; } MultiSIMD & Tail() { return tail; } template SIMD Get() const { return NR==0 ? head : tail.template Get(); } template SIMD & Get() { return NR==0 ? head : tail.template Get(); } }; template class MultiSIMD<2,T> { SIMD v0, v1; public: MultiSIMD () = default; MultiSIMD (const MultiSIMD & ) = default; MultiSIMD (T v) : v0(v), v1(v) { ; } MultiSIMD (SIMD _v0, SIMD _v1) : v0(_v0), v1(_v1) { ; } SIMD Head() const { return v0; } SIMD Tail() const { return v1; } SIMD & Head() { return v0; } SIMD & Tail() { return v1; } template SIMD Get() const { return NR==0 ? v0 : v1; } template SIMD & Get() { return NR==0 ? v0 : v1; } }; template inline MultiSIMD operator+ (MultiSIMD a, MultiSIMD b) { return MultiSIMD (a.Head()+b.Head(), a.Tail()+b.Tail()); } template inline MultiSIMD operator+ (double a, MultiSIMD b) { return MultiSIMD (a+b.Head(), a+b.Tail()); } template inline MultiSIMD operator+ (MultiSIMD b, double a) { return MultiSIMD (a+b.Head(), a+b.Tail()); } template inline MultiSIMD operator- (MultiSIMD a, MultiSIMD b) { return MultiSIMD (a.Head()-b.Head(), a.Tail()-b.Tail()); } template inline MultiSIMD operator- (double a, MultiSIMD b) { return MultiSIMD (a-b.Head(), a-b.Tail()); } template inline MultiSIMD operator- (MultiSIMD b, double a) { return MultiSIMD (b.Head()-a, b.Tail()-a); } template inline MultiSIMD operator- (MultiSIMD a) { return MultiSIMD (-a.Head(), -a.Tail()); } template inline MultiSIMD operator* (MultiSIMD a, MultiSIMD b) { return MultiSIMD (a.Head()*b.Head(), a.Tail()*b.Tail()); } template inline MultiSIMD operator/ (MultiSIMD a, MultiSIMD b) { return MultiSIMD (a.Head()/b.Head(), a.Tail()/b.Tail()); } template inline MultiSIMD operator* (double a, MultiSIMD b) { return MultiSIMD ( a*b.Head(), a*b.Tail()); } template inline MultiSIMD operator* (MultiSIMD b, double a) { return MultiSIMD ( a*b.Head(), a*b.Tail()); } template inline MultiSIMD & operator+= (MultiSIMD & a, MultiSIMD b) { a.Head()+=b.Head(); a.Tail()+=b.Tail(); return a; } template inline MultiSIMD operator-= (MultiSIMD & a, double b) { a.Head()-=b; a.Tail()-=b; return a; } template inline MultiSIMD operator-= (MultiSIMD & a, MultiSIMD b) { a.Head()-=b.Head(); a.Tail()-=b.Tail(); return a; } template inline MultiSIMD & operator*= (MultiSIMD & a, MultiSIMD b) { a.Head()*=b.Head(); a.Tail()*=b.Tail(); return a; } template inline MultiSIMD & operator*= (MultiSIMD & a, double b) { a.Head()*=b; a.Tail()*=b; return a; } // inline MultiSIMD operator/= (MultiSIMD & a, MultiSIMD b) { return a.Data()/=b.Data(); } template ostream & operator<< (ostream & ost, MultiSIMD multi) { ost << multi.Head() << " " << multi.Tail(); return ost; } } #endif