From fc44eb95dfdb09c458c8c046b8249d2520944886 Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Fri, 11 Dec 2020 23:12:34 +0100 Subject: [PATCH] simd - array and variadic ctor --- libsrc/core/simd_avx.hpp | 6 +++ libsrc/core/simd_avx512.hpp | 4 +- libsrc/core/simd_generic.hpp | 101 ++++++++++++++++++++++++++++------- libsrc/core/simd_sse.hpp | 6 +++ 4 files changed, 96 insertions(+), 21 deletions(-) diff --git a/libsrc/core/simd_avx.hpp b/libsrc/core/simd_avx.hpp index 92ac6c04..c845ee2c 100644 --- a/libsrc/core/simd_avx.hpp +++ b/libsrc/core/simd_avx.hpp @@ -73,6 +73,9 @@ namespace ngcore SIMD (int64_t val) { data = _mm256_set1_epi64x(val); } SIMD (int64_t v0, int64_t v1, int64_t v2, int64_t v3) { data = _mm256_set_epi64x(v3,v2,v1,v0); } + SIMD (std::array a) + : data{_mm256_set_epi64x(a[3],a[2],a[1],a[0])} + {} // SIMD (SIMD v0, SIMD v1) : SIMD(v0[0], v0[1], v1[0], v1[1]) { ; } SIMD (__m256i _data) { data = _data; } @@ -123,6 +126,9 @@ namespace ngcore SIMD (double const * p) { data = _mm256_loadu_pd(p); } SIMD (double const * p, SIMD mask) { data = _mm256_maskload_pd(p, mask.Data()); } SIMD (__m256d _data) { data = _data; } + SIMD (std::array a) + : data{_mm256_set_pd(a[3],a[2],a[1],a[0])} + {} void Store (double * p) { _mm256_storeu_pd(p, data); } void Store (double * p, SIMD mask) { _mm256_maskstore_pd(p, mask.Data(), data); } diff --git a/libsrc/core/simd_avx512.hpp b/libsrc/core/simd_avx512.hpp index 4c461371..1f06e826 100644 --- a/libsrc/core/simd_avx512.hpp +++ b/libsrc/core/simd_avx512.hpp @@ -214,9 +214,9 @@ namespace ngcore NETGEN_INLINE SIMD HSum (SIMD v1, SIMD v2, SIMD v3, SIMD v4) { SIMD lo,hi; - tie(lo,hi) = Unpack(v1, v2); + std::tie(lo,hi) = Unpack(v1, v2); SIMD sum01 = lo+hi; - tie(lo,hi) = Unpack(v3, v4); + std::tie(lo,hi) = Unpack(v3, v4); SIMD sum23 = lo+hi; // sum01 b a b a b a b a // sum23 d c d c d c d c diff --git a/libsrc/core/simd_generic.hpp b/libsrc/core/simd_generic.hpp index 8cebe30b..027b0f14 100644 --- a/libsrc/core/simd_generic.hpp +++ b/libsrc/core/simd_generic.hpp @@ -15,7 +15,6 @@ namespace ngcore { - using namespace ngcore; constexpr int GetDefaultSIMDSize() { #if defined __AVX512F__ @@ -34,6 +33,23 @@ namespace ngcore class mask64; + //////////////////////////////////////////////////////////////////////////// + namespace detail { + template + auto array_range_impl(std::array const& arr, + size_t first, + std::index_sequence) + -> std::array { + return {arr[first + I]...}; + } + + template + auto array_range(std::array const& arr, size_t first) { + return array_range_impl(arr, first, std::make_index_sequence{}); + } + + } // namespace detail + //////////////////////////////////////////////////////////////////////////// // mask @@ -89,10 +105,19 @@ namespace ngcore SIMD (const SIMD &) = default; SIMD & operator= (const SIMD &) = default; SIMD (int64_t val) { data = val; } + SIMD (std::array arr) + : data{arr[0]} + {} int64_t operator[] (int i) const { return ((int64_t*)(&data))[i]; } auto Data() const { return data; } static SIMD FirstInt(int64_t n0=0) { return {n0}; } + template + int64_t Get() + { + static_assert(I==0); + return data; + } }; template @@ -114,6 +139,20 @@ namespace ngcore SIMD (int64_t val) : lo{val}, high{val} { ; } SIMD (SIMD lo_, SIMD high_) : lo(lo_), high(high_) { ; } + SIMD( std::array arr ) + : lo(detail::array_range(arr, 0)), + high(detail::array_range(arr, N1)) + {} + + template + SIMD(const T... vals) + : lo(detail::array_range(std::array{vals...}, 0)), + high(detail::array_range(std::array{vals...}, N1)) + { + static_assert(sizeof...(vals)==N, "wrong number of arguments"); + } + + template>::value, int>::type = 0> SIMD (const T & func) { @@ -137,6 +176,13 @@ namespace ngcore static SIMD FirstInt() { return { 0, 1, 2, 3 }; } */ static SIMD FirstInt(int64_t n0=0) { return {SIMD::FirstInt(n0), SIMD::FirstInt(n0+N1)}; } + template + int64_t Get() + { + static_assert(I>=0 && I(); + else return high.template Get(); + } }; @@ -158,6 +204,9 @@ namespace ngcore SIMD (size_t val) { data = val; } SIMD (double const * p) { data = *p; } SIMD (double const * p, SIMD mask) { data = mask.Data() ? *p : 0.0; } + SIMD (std::array arr) + : data{arr[0]} + {} template >::value,int>::type = 0> SIMD (const T & func) @@ -177,8 +226,15 @@ namespace ngcore double operator[] (int i) const { return ((double*)(&data))[i]; } double Data() const { return data; } + template + double Get() + { + static_assert(I==0); + return data; + } }; + template class SIMD { @@ -194,22 +250,6 @@ namespace ngcore SIMD (const SIMD &) = default; SIMD (SIMD lo_, SIMD hi_) : lo(lo_), high(hi_) { ; } - template> - SIMD (double v0, double v1, double v2, double v3) - { - if constexpr(N1==1) - { - lo = v0; - high = {v1,v2,v3}; - } - if constexpr(N1==2) - { - lo = {v0,v1}; - high = {v2,v3}; - - } - } - template >::value,int>::type = 0> SIMD (const T & func) { @@ -240,6 +280,23 @@ namespace ngcore SIMD (double const * p, SIMD mask) : lo{p, mask.Lo()}, high{p+N1, mask.Hi()} { } + SIMD (double * p) : lo{p}, high{p+N1} { ; } + SIMD (double * p, SIMD mask) + : lo{p, mask.Lo()}, high{p+N1, mask.Hi()} + { } + + SIMD( std::array arr ) + : lo(detail::array_range(arr, 0)), + high(detail::array_range(arr, N1)) + {} + + template + SIMD(const T... vals) + : lo(detail::array_range(std::array{vals...}, 0)), + high(detail::array_range(std::array{vals...}, N1)) + { + static_assert(sizeof...(vals)==N, "wrong number of arguments"); + } void Store (double * p) { lo.Store(p); high.Store(p+N1); } void Store (double * p, SIMD mask) @@ -261,6 +318,13 @@ namespace ngcore operator std::tuple () { return std::tuple((*this)[0], (*this)[1], (*this)[2], (*this)[3]); } + template + double Get() + { + static_assert(I>=0 && I(); + else return high.template Get(); + } }; @@ -582,10 +646,9 @@ namespace ngcore } } - - } + namespace std { // structured binding support diff --git a/libsrc/core/simd_sse.hpp b/libsrc/core/simd_sse.hpp index 6ea3f021..300ddc0b 100644 --- a/libsrc/core/simd_sse.hpp +++ b/libsrc/core/simd_sse.hpp @@ -48,6 +48,9 @@ namespace ngcore SIMD () {} SIMD (const SIMD &) = default; SIMD (int64_t v0, int64_t v1) { data = _mm_set_epi64x(v1,v0); } + SIMD (std::array arr) + : data{_mm_set_epi64x(arr[1],arr[0])} + {} SIMD & operator= (const SIMD &) = default; @@ -83,6 +86,9 @@ NETGEN_INLINE SIMD operator- (SIMD a, SIMD b) { SIMD () {} SIMD (const SIMD &) = default; SIMD (double v0, double v1) { data = _mm_set_pd(v1,v0); } + SIMD (std::array arr) + : data{_mm_set_pd(arr[1], arr[0])} + {} SIMD & operator= (const SIMD &) = default;