Merge branch 'simd_fixes' into 'master'

Some fixes for odd SIMD sizes

See merge request jschoeberl/netgen!499
This commit is contained in:
Joachim Schöberl 2022-04-15 14:40:43 +00:00
commit 693135d52c
5 changed files with 59 additions and 9 deletions

View File

@ -42,6 +42,7 @@ namespace ngcore
SIMD (const SIMD &) = default; SIMD (const SIMD &) = default;
// SIMD (double v0, double v1) : data{v0,v1} { } // SIMD (double v0, double v1) : data{v0,v1} { }
SIMD (double v0, double v1) : data{vcombine_f64(float64x1_t{v0}, float64x1_t{v1})} { } SIMD (double v0, double v1) : data{vcombine_f64(float64x1_t{v0}, float64x1_t{v1})} { }
SIMD (SIMD<double,1> v0, SIMD<double,1> v1) : data{vcombine_f64(float64x1_t{v0.Data()}, float64x1_t{v1.Data()})} { }
SIMD (std::array<double, 2> arr) : data{arr[0], arr[1]} { } SIMD (std::array<double, 2> arr) : data{arr[0], arr[1]} { }
SIMD & operator= (const SIMD &) = default; SIMD & operator= (const SIMD &) = default;

View File

@ -143,8 +143,6 @@ namespace ngcore
NETGEN_INLINE double & operator[] (int i) { return ((double*)(&data))[i]; } NETGEN_INLINE double & operator[] (int i) { return ((double*)(&data))[i]; }
// [[deprecated("don't write to individual elements of SIMD")]] // [[deprecated("don't write to individual elements of SIMD")]]
// NETGEN_INLINE double & operator[] (int i) { return ((double*)(&data))[i]; } // NETGEN_INLINE double & operator[] (int i) { return ((double*)(&data))[i]; }
template <int I>
double Get() const { return ((double*)(&data))[I]; }
NETGEN_INLINE __m256d Data() const { return data; } NETGEN_INLINE __m256d Data() const { return data; }
NETGEN_INLINE __m256d & Data() { return data; } NETGEN_INLINE __m256d & Data() { return data; }
@ -153,6 +151,13 @@ namespace ngcore
operator std::tuple<double&,double&,double&,double&> () operator std::tuple<double&,double&,double&,double&> ()
{ return std::tuple<double&,double&,double&,double&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); } { return std::tuple<double&,double&,double&,double&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
template <int I>
double Get() const
{
static_assert(I>=0 && I<4, "Index out of range");
return (*this)[I];
}
}; };
NETGEN_INLINE auto Unpack (SIMD<double,4> a, SIMD<double,4> b) NETGEN_INLINE auto Unpack (SIMD<double,4> a, SIMD<double,4> b)

View File

@ -92,6 +92,12 @@ namespace ngcore
SIMD (double const * p, SIMD<mask64,8> mask) SIMD (double const * p, SIMD<mask64,8> mask)
{ data = _mm512_mask_loadu_pd(_mm512_setzero_pd(), mask.Data(), p); } { data = _mm512_mask_loadu_pd(_mm512_setzero_pd(), mask.Data(), p); }
SIMD (__m512d _data) { data = _data; } SIMD (__m512d _data) { data = _data; }
SIMD (SIMD<double,4> v0, SIMD<double,4> v1)
: data(_mm512_set_pd(v1[3], v1[2], v1[1], v1[0], v0[3], v0[2], v0[1], v0[0]))
{}
SIMD (SIMD<double,6> v0, SIMD<double,2> v1)
: data(_mm512_set_pd(v1[1], v1[0], v0[5], v0[4], v0[3], v0[2], v0[1], v0[0]))
{}
template<typename T, typename std::enable_if<std::is_convertible<T, std::function<double(int)>>::value, int>::type = 0> template<typename T, typename std::enable_if<std::is_convertible<T, std::function<double(int)>>::value, int>::type = 0>
SIMD (const T & func) SIMD (const T & func)
@ -129,6 +135,12 @@ namespace ngcore
NETGEN_INLINE __m512d Data() const { return data; } NETGEN_INLINE __m512d Data() const { return data; }
NETGEN_INLINE __m512d & Data() { return data; } NETGEN_INLINE __m512d & Data() { return data; }
template <int I>
double Get() const
{
static_assert(I>=0 && I<8, "Index out of range");
return (*this)[I];
}
}; };
NETGEN_INLINE SIMD<double,8> operator- (SIMD<double,8> a) { return -a.Data(); } NETGEN_INLINE SIMD<double,8> operator- (SIMD<double,8> a) { return -a.Data(); }

View File

@ -28,6 +28,28 @@ namespace ngcore
#endif #endif
} }
constexpr bool IsNativeSIMDSize(int n) {
if(n==1) return true;
#if defined NETGEN_ARCH_AMD64 || defined __SSE__ || defined __aarch64__
if(n==2) return true;
#endif
#if defined __AVX__
if(n==4) return true;
#endif
#if defined __AVX512F__
if(n==8) return true;
#endif
return false;
}
// split n = k+l such that k is the largest natively supported simd size < n
constexpr int GetLargestNativeSIMDPart(int n) {
int k = n-1;
while(!IsNativeSIMDSize(k))
k--;
return k;
}
template <typename T, int N=GetDefaultSIMDSize()> class SIMD; template <typename T, int N=GetDefaultSIMDSize()> class SIMD;
@ -67,9 +89,9 @@ namespace ngcore
template <int N> template <int N>
class alignas(GetDefaultSIMDSize()*sizeof(int64_t)) SIMD<mask64,N> class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<mask64,N>
{ {
static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2); static constexpr int N1 = GetLargestNativeSIMDPart(N);
static constexpr int N2 = N-N1; static constexpr int N2 = N-N1;
SIMD<mask64,N1> lo; SIMD<mask64,N1> lo;
@ -123,9 +145,9 @@ namespace ngcore
}; };
template<int N> template<int N>
class alignas(GetDefaultSIMDSize()*sizeof(int64_t)) SIMD<int64_t,N> class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<int64_t,N>
{ {
static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2); static constexpr int N1 = GetLargestNativeSIMDPart(N);
static constexpr int N2 = N-N1; static constexpr int N2 = N-N1;
SIMD<int64_t,N1> lo; SIMD<int64_t,N1> lo;
@ -240,9 +262,9 @@ namespace ngcore
template<int N> template<int N>
class alignas(GetDefaultSIMDSize()*sizeof(double)) SIMD<double, N> class alignas(GetLargestNativeSIMDPart(N)*sizeof(double)) SIMD<double, N>
{ {
static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2); static constexpr int N1 = GetLargestNativeSIMDPart(N);
static constexpr int N2 = N-N1; static constexpr int N2 = N-N1;
SIMD<double, N1> lo; SIMD<double, N1> lo;
@ -543,7 +565,7 @@ namespace ngcore
template <int i, typename T, int N> template <int i, typename T, int N>
T get(SIMD<T,N> a) { return a[i]; } T get(SIMD<T,N> a) { return a.template Get<i>(); }
template <int NUM, typename FUNC> template <int NUM, typename FUNC>
NETGEN_INLINE void Iterate2 (FUNC f) NETGEN_INLINE void Iterate2 (FUNC f)

View File

@ -86,6 +86,9 @@ NETGEN_INLINE SIMD<int64_t,2> operator- (SIMD<int64_t,2> a, SIMD<int64_t,2> b) {
SIMD () {} SIMD () {}
SIMD (const SIMD &) = default; SIMD (const SIMD &) = default;
SIMD (double v0, double v1) { data = _mm_set_pd(v1,v0); } SIMD (double v0, double v1) { data = _mm_set_pd(v1,v0); }
SIMD (SIMD<double,1> v0, SIMD<double,1> v1)
: data{_mm_set_pd(v0.Data(), v1.Data())}
{ }
SIMD (std::array<double, 2> arr) SIMD (std::array<double, 2> arr)
: data{_mm_set_pd(arr[1], arr[0])} : data{_mm_set_pd(arr[1], arr[0])}
{} {}
@ -137,6 +140,13 @@ NETGEN_INLINE SIMD<int64_t,2> operator- (SIMD<int64_t,2> a, SIMD<int64_t,2> b) {
NETGEN_INLINE __m128d Data() const { return data; } NETGEN_INLINE __m128d Data() const { return data; }
NETGEN_INLINE __m128d & Data() { return data; } NETGEN_INLINE __m128d & Data() { return data; }
template <int I>
double Get()
{
static_assert(I>=0 && I<2, "Index out of range");
return (*this)[I];
}
operator std::tuple<double&,double&> () operator std::tuple<double&,double&> ()
{ {
auto pdata = (double*)&data; auto pdata = (double*)&data;