simd - array and variadic ctor

This commit is contained in:
Matthias Hochsteger 2020-12-11 23:12:34 +01:00
parent 2d667a08dc
commit fc44eb95df
4 changed files with 96 additions and 21 deletions

View File

@ -73,6 +73,9 @@ namespace ngcore
SIMD (int64_t val) { data = _mm256_set1_epi64x(val); } SIMD (int64_t val) { data = _mm256_set1_epi64x(val); }
SIMD (int64_t v0, int64_t v1, int64_t v2, int64_t v3) { data = _mm256_set_epi64x(v3,v2,v1,v0); } SIMD (int64_t v0, int64_t v1, int64_t v2, int64_t v3) { data = _mm256_set_epi64x(v3,v2,v1,v0); }
SIMD (std::array<int64_t,4> a)
: data{_mm256_set_epi64x(a[3],a[2],a[1],a[0])}
{}
// SIMD (SIMD<double,2> v0, SIMD<double,2> v1) : SIMD(v0[0], v0[1], v1[0], v1[1]) { ; } // SIMD (SIMD<double,2> v0, SIMD<double,2> v1) : SIMD(v0[0], v0[1], v1[0], v1[1]) { ; }
SIMD (__m256i _data) { data = _data; } SIMD (__m256i _data) { data = _data; }
@ -123,6 +126,9 @@ namespace ngcore
SIMD (double const * p) { data = _mm256_loadu_pd(p); } SIMD (double const * p) { data = _mm256_loadu_pd(p); }
SIMD (double const * p, SIMD<mask64,4> mask) { data = _mm256_maskload_pd(p, mask.Data()); } SIMD (double const * p, SIMD<mask64,4> mask) { data = _mm256_maskload_pd(p, mask.Data()); }
SIMD (__m256d _data) { data = _data; } SIMD (__m256d _data) { data = _data; }
SIMD (std::array<double,4> a)
: data{_mm256_set_pd(a[3],a[2],a[1],a[0])}
{}
void Store (double * p) { _mm256_storeu_pd(p, data); } void Store (double * p) { _mm256_storeu_pd(p, data); }
void Store (double * p, SIMD<mask64,4> mask) { _mm256_maskstore_pd(p, mask.Data(), data); } void Store (double * p, SIMD<mask64,4> mask) { _mm256_maskstore_pd(p, mask.Data(), data); }

View File

@ -214,9 +214,9 @@ namespace ngcore
NETGEN_INLINE SIMD<double,4> HSum (SIMD<double,8> v1, SIMD<double,8> v2, SIMD<double,8> v3, SIMD<double,8> v4) NETGEN_INLINE SIMD<double,4> HSum (SIMD<double,8> v1, SIMD<double,8> v2, SIMD<double,8> v3, SIMD<double,8> v4)
{ {
SIMD<double> lo,hi; SIMD<double> lo,hi;
tie(lo,hi) = Unpack(v1, v2); std::tie(lo,hi) = Unpack(v1, v2);
SIMD<double> sum01 = lo+hi; SIMD<double> sum01 = lo+hi;
tie(lo,hi) = Unpack(v3, v4); std::tie(lo,hi) = Unpack(v3, v4);
SIMD<double> sum23 = lo+hi; SIMD<double> sum23 = lo+hi;
// sum01 b a b a b a b a // sum01 b a b a b a b a
// sum23 d c d c d c d c // sum23 d c d c d c d c

View File

@ -15,7 +15,6 @@
namespace ngcore namespace ngcore
{ {
using namespace ngcore;
constexpr int GetDefaultSIMDSize() { constexpr int GetDefaultSIMDSize() {
#if defined __AVX512F__ #if defined __AVX512F__
@ -34,6 +33,23 @@ namespace ngcore
class mask64; class mask64;
////////////////////////////////////////////////////////////////////////////
namespace detail {
template <typename T, size_t N, size_t... I>
auto array_range_impl(std::array<T, N> const& arr,
size_t first,
std::index_sequence<I...>)
-> std::array<T, sizeof...(I)> {
return {arr[first + I]...};
}
template <size_t S, typename T, size_t N>
auto array_range(std::array<T, N> const& arr, size_t first) {
return array_range_impl(arr, first, std::make_index_sequence<S>{});
}
} // namespace detail
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// mask // mask
@ -89,10 +105,19 @@ namespace ngcore
SIMD (const SIMD &) = default; SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default; SIMD & operator= (const SIMD &) = default;
SIMD (int64_t val) { data = val; } SIMD (int64_t val) { data = val; }
SIMD (std::array<int64_t, 1> arr)
: data{arr[0]}
{}
int64_t operator[] (int i) const { return ((int64_t*)(&data))[i]; } int64_t operator[] (int i) const { return ((int64_t*)(&data))[i]; }
auto Data() const { return data; } auto Data() const { return data; }
static SIMD FirstInt(int64_t n0=0) { return {n0}; } static SIMD FirstInt(int64_t n0=0) { return {n0}; }
template <int I>
int64_t Get()
{
static_assert(I==0);
return data;
}
}; };
template<int N> template<int N>
@ -114,6 +139,20 @@ namespace ngcore
SIMD (int64_t val) : lo{val}, high{val} { ; } SIMD (int64_t val) : lo{val}, high{val} { ; }
SIMD (SIMD<int64_t,N1> lo_, SIMD<int64_t,N2> high_) : lo(lo_), high(high_) { ; } SIMD (SIMD<int64_t,N1> lo_, SIMD<int64_t,N2> high_) : lo(lo_), high(high_) { ; }
SIMD( std::array<int64_t, N> arr )
: lo(detail::array_range<N1>(arr, 0)),
high(detail::array_range<N2>(arr, N1))
{}
template<typename ...T>
SIMD(const T... vals)
: lo(detail::array_range<N1>(std::array<int64_t, N>{vals...}, 0)),
high(detail::array_range<N2>(std::array<int64_t, N>{vals...}, N1))
{
static_assert(sizeof...(vals)==N, "wrong number of arguments");
}
template<typename T, typename std::enable_if<std::is_convertible<T, std::function<int64_t(int)>>::value, int>::type = 0> template<typename T, typename std::enable_if<std::is_convertible<T, std::function<int64_t(int)>>::value, int>::type = 0>
SIMD (const T & func) SIMD (const T & func)
{ {
@ -137,6 +176,13 @@ namespace ngcore
static SIMD FirstInt() { return { 0, 1, 2, 3 }; } static SIMD FirstInt() { return { 0, 1, 2, 3 }; }
*/ */
static SIMD FirstInt(int64_t n0=0) { return {SIMD<int64_t,N1>::FirstInt(n0), SIMD<int64_t,N2>::FirstInt(n0+N1)}; } static SIMD FirstInt(int64_t n0=0) { return {SIMD<int64_t,N1>::FirstInt(n0), SIMD<int64_t,N2>::FirstInt(n0+N1)}; }
template <int I>
int64_t Get()
{
static_assert(I>=0 && I<N, "Index out of range");
if constexpr(I<N1) return lo.template Get<I>();
else return high.template Get<I-N1>();
}
}; };
@ -158,6 +204,9 @@ namespace ngcore
SIMD (size_t val) { data = val; } SIMD (size_t val) { data = val; }
SIMD (double const * p) { data = *p; } SIMD (double const * p) { data = *p; }
SIMD (double const * p, SIMD<mask64,1> mask) { data = mask.Data() ? *p : 0.0; } SIMD (double const * p, SIMD<mask64,1> mask) { data = mask.Data() ? *p : 0.0; }
SIMD (std::array<double, 1> arr)
: data{arr[0]}
{}
template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0> template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
SIMD (const T & func) SIMD (const T & func)
@ -177,8 +226,15 @@ namespace ngcore
double operator[] (int i) const { return ((double*)(&data))[i]; } double operator[] (int i) const { return ((double*)(&data))[i]; }
double Data() const { return data; } double Data() const { return data; }
template <int I>
double Get()
{
static_assert(I==0);
return data;
}
}; };
template<int N> template<int N>
class SIMD<double, N> class SIMD<double, N>
{ {
@ -194,22 +250,6 @@ namespace ngcore
SIMD (const SIMD &) = default; SIMD (const SIMD &) = default;
SIMD (SIMD<double,N1> lo_, SIMD<double,N2> hi_) : lo(lo_), high(hi_) { ; } SIMD (SIMD<double,N1> lo_, SIMD<double,N2> hi_) : lo(lo_), high(hi_) { ; }
template<typename=std::enable_if<N==4>>
SIMD (double v0, double v1, double v2, double v3)
{
if constexpr(N1==1)
{
lo = v0;
high = {v1,v2,v3};
}
if constexpr(N1==2)
{
lo = {v0,v1};
high = {v2,v3};
}
}
template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0> template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
SIMD (const T & func) SIMD (const T & func)
{ {
@ -240,6 +280,23 @@ namespace ngcore
SIMD (double const * p, SIMD<mask64,N> mask) SIMD (double const * p, SIMD<mask64,N> mask)
: lo{p, mask.Lo()}, high{p+N1, mask.Hi()} : lo{p, mask.Lo()}, high{p+N1, mask.Hi()}
{ } { }
SIMD (double * p) : lo{p}, high{p+N1} { ; }
SIMD (double * p, SIMD<mask64,N> mask)
: lo{p, mask.Lo()}, high{p+N1, mask.Hi()}
{ }
SIMD( std::array<double, N> arr )
: lo(detail::array_range<N1>(arr, 0)),
high(detail::array_range<N2>(arr, N1))
{}
template<typename ...T>
SIMD(const T... vals)
: lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)),
high(detail::array_range<N2>(std::array<double, N>{vals...}, N1))
{
static_assert(sizeof...(vals)==N, "wrong number of arguments");
}
void Store (double * p) { lo.Store(p); high.Store(p+N1); } void Store (double * p) { lo.Store(p); high.Store(p+N1); }
void Store (double * p, SIMD<mask64,N> mask) void Store (double * p, SIMD<mask64,N> mask)
@ -261,6 +318,13 @@ namespace ngcore
operator std::tuple<double&,double&,double&,double&> () operator std::tuple<double&,double&,double&,double&> ()
{ return std::tuple<double&,double&,double&,double&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); } { return std::tuple<double&,double&,double&,double&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
template <int I>
double Get()
{
static_assert(I>=0 && I<N, "Index out of range");
if constexpr(I<N1) return lo.template Get<I>();
else return high.template Get<I-N1>();
}
}; };
@ -582,10 +646,9 @@ namespace ngcore
} }
} }
} }
namespace std namespace std
{ {
// structured binding support // structured binding support

View File

@ -48,6 +48,9 @@ namespace ngcore
SIMD () {} SIMD () {}
SIMD (const SIMD &) = default; SIMD (const SIMD &) = default;
SIMD (int64_t v0, int64_t v1) { data = _mm_set_epi64x(v1,v0); } SIMD (int64_t v0, int64_t v1) { data = _mm_set_epi64x(v1,v0); }
SIMD (std::array<int64_t, 2> arr)
: data{_mm_set_epi64x(arr[1],arr[0])}
{}
SIMD & operator= (const SIMD &) = default; SIMD & operator= (const SIMD &) = default;
@ -83,6 +86,9 @@ NETGEN_INLINE SIMD<int64_t,2> operator- (SIMD<int64_t,2> a, SIMD<int64_t,2> b) {
SIMD () {} SIMD () {}
SIMD (const SIMD &) = default; SIMD (const SIMD &) = default;
SIMD (double v0, double v1) { data = _mm_set_pd(v1,v0); } SIMD (double v0, double v1) { data = _mm_set_pd(v1,v0); }
SIMD (std::array<double, 2> arr)
: data{_mm_set_pd(arr[1], arr[0])}
{}
SIMD & operator= (const SIMD &) = default; SIMD & operator= (const SIMD &) = default;