From 949ead991f03e508180a958e26f2b53fd7a87a06 Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Tue, 10 Oct 2017 18:15:28 +0200 Subject: [PATCH] Update ngsimd.hpp --- libsrc/general/ngsimd.hpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/libsrc/general/ngsimd.hpp b/libsrc/general/ngsimd.hpp index 7475190b..92bf608b 100644 --- a/libsrc/general/ngsimd.hpp +++ b/libsrc/general/ngsimd.hpp @@ -235,6 +235,8 @@ using std::fabs; NG_INLINE SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } NG_INLINE SIMD sqrt (SIMD a) { return std::sqrt(a.Data()); } + NG_INLINE SIMD floor (SIMD a) { return std::floor(a.Data()); } + NG_INLINE SIMD ceil (SIMD a) { return std::ceil(a.Data()); } NG_INLINE SIMD fabs (SIMD a) { return std::fabs(a.Data()); } NG_INLINE SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } NG_INLINE SIMD Trans (SIMD a) { return a; } @@ -253,6 +255,11 @@ using std::fabs; return std::make_tuple(sd1.Data(), sd2.Data()); } + NG_INLINE auto HSum (SIMD sd1, SIMD sd2, SIMD sd3, SIMD sd4) + { + return std::make_tuple(sd1.Data(), sd2.Data(), sd3.Data(), sd4.Data()); + } + ///////////////////////////////////////////////////////////////////////////// // AVX - Simd width 4 ///////////////////////////////////////////////////////////////////////////// @@ -293,9 +300,14 @@ using std::fabs; NG_INLINE operator __m256d() const { return data; } NG_INLINE double operator[] (int i) const { return ((double*)(&data))[i]; } + NG_INLINE double& operator[] (int i) { return ((double*)(&data))[i]; } NG_INLINE __m256d Data() const { return data; } NG_INLINE __m256d & Data() { return data; } + NG_INLINE operator std::tuple () + { return std::tuple((*this)[0], (*this)[1], (*this)[2], (*this)[3]); } + + NG_INLINE SIMD &operator+= (SIMD b) { data+=b.Data(); return *this; } NG_INLINE SIMD &operator-= (SIMD b) { data-=b.Data(); return *this; } NG_INLINE SIMD &operator*= (SIMD b) { data*=b.Data(); return *this; } @@ -310,6 +322,8 @@ using std::fabs; NG_INLINE SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } NG_INLINE SIMD sqrt (SIMD a) { return _mm256_sqrt_pd(a.Data()); } + NG_INLINE SIMD floor (SIMD a) { return _mm256_floor_pd(a.Data()); } + NG_INLINE SIMD ceil (SIMD a) { return _mm256_ceil_pd(a.Data()); } NG_INLINE SIMD fabs (SIMD a) { return _mm256_max_pd(a.Data(), -a.Data()); } NG_INLINE SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } NG_INLINE SIMD Trans (SIMD a) { return a; } @@ -332,13 +346,13 @@ using std::fabs; return std::make_tuple(_mm_cvtsd_f64 (hv2), _mm_cvtsd_f64(_mm_shuffle_pd (hv2, hv2, 3))); } - NG_INLINE SIMD HSum (SIMD v1, SIMD v2, SIMD v3, SIMD v4) + NG_INLINE auto HSum (SIMD v1, SIMD v2, SIMD v3, SIMD v4) { __m256d hsum1 = _mm256_hadd_pd (v1.Data(), v2.Data()); __m256d hsum2 = _mm256_hadd_pd (v3.Data(), v4.Data()); __m256d hsum = _mm256_add_pd (_mm256_permute2f128_pd (hsum1, hsum2, 1+2*16), _mm256_blend_pd (hsum1, hsum2, 12)); - return hsum; + return SIMD(hsum); } #endif // __AVX__ @@ -402,6 +416,8 @@ using std::fabs; NG_INLINE SIMD operator/ (SIMD a, SIMD b) { return _mm512_div_pd(a.Data(),b.Data()); } NG_INLINE SIMD sqrt (SIMD a) { return _mm512_sqrt_pd(a.Data()); } + NG_INLINE SIMD floor (SIMD a) { return _mm512_floor_pd(a.Data()); } + NG_INLINE SIMD ceil (SIMD a) { return _mm512_ceil_pd(a.Data()); } NG_INLINE SIMD fabs (SIMD a) { return _mm512_max_pd(a.Data(), -a.Data()); } NG_INLINE SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } NG_INLINE SIMD Trans (SIMD a) { return a; } @@ -467,6 +483,8 @@ using std::fabs; SIMD Get() const { return NR==0 ? head : tail.template Get(); } template SIMD & Get() { return NR==0 ? head : tail.template Get(); } + auto MakeTuple() { return std::tuple_cat(std::tuple&> (head), tail.MakeTuple()); } + operator auto () { return MakeTuple(); } }; template @@ -488,6 +506,8 @@ using std::fabs; SIMD Get() const { return NR==0 ? v0 : v1; } template SIMD & Get() { return NR==0 ? v0 : v1; } + auto MakeTuple() { return std::tuple&, SIMD&> (v0, v1); } + operator auto () { return MakeTuple(); } }; template NG_INLINE MultiSIMD operator+ (MultiSIMD a, MultiSIMD b)