mirror of
https://github.com/NGSolve/netgen.git
synced 2024-12-25 05:20:34 +05:00
Some fixes for odd SIMD sizes
This commit is contained in:
parent
39cc7ae0a3
commit
e4ff37887b
@ -42,6 +42,7 @@ namespace ngcore
|
|||||||
SIMD (const SIMD &) = default;
|
SIMD (const SIMD &) = default;
|
||||||
// SIMD (double v0, double v1) : data{v0,v1} { }
|
// SIMD (double v0, double v1) : data{v0,v1} { }
|
||||||
SIMD (double v0, double v1) : data{vcombine_f64(float64x1_t{v0}, float64x1_t{v1})} { }
|
SIMD (double v0, double v1) : data{vcombine_f64(float64x1_t{v0}, float64x1_t{v1})} { }
|
||||||
|
SIMD (SIMD<double,1> v0, SIMD<double,1> v1) : data{vcombine_f64(float64x1_t{v0.Data()}, float64x1_t{v1.Data()})} { }
|
||||||
SIMD (std::array<double, 2> arr) : data{arr[0], arr[1]} { }
|
SIMD (std::array<double, 2> arr) : data{arr[0], arr[1]} { }
|
||||||
|
|
||||||
SIMD & operator= (const SIMD &) = default;
|
SIMD & operator= (const SIMD &) = default;
|
||||||
|
@ -143,8 +143,6 @@ namespace ngcore
|
|||||||
NETGEN_INLINE double & operator[] (int i) { return ((double*)(&data))[i]; }
|
NETGEN_INLINE double & operator[] (int i) { return ((double*)(&data))[i]; }
|
||||||
// [[deprecated("don't write to individual elements of SIMD")]]
|
// [[deprecated("don't write to individual elements of SIMD")]]
|
||||||
// NETGEN_INLINE double & operator[] (int i) { return ((double*)(&data))[i]; }
|
// NETGEN_INLINE double & operator[] (int i) { return ((double*)(&data))[i]; }
|
||||||
template <int I>
|
|
||||||
double Get() const { return ((double*)(&data))[I]; }
|
|
||||||
NETGEN_INLINE __m256d Data() const { return data; }
|
NETGEN_INLINE __m256d Data() const { return data; }
|
||||||
NETGEN_INLINE __m256d & Data() { return data; }
|
NETGEN_INLINE __m256d & Data() { return data; }
|
||||||
|
|
||||||
@ -153,6 +151,13 @@ namespace ngcore
|
|||||||
|
|
||||||
operator std::tuple<double&,double&,double&,double&> ()
|
operator std::tuple<double&,double&,double&,double&> ()
|
||||||
{ return std::tuple<double&,double&,double&,double&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
|
{ return std::tuple<double&,double&,double&,double&>((*this)[0], (*this)[1], (*this)[2], (*this)[3]); }
|
||||||
|
|
||||||
|
template <int I>
|
||||||
|
double Get() const
|
||||||
|
{
|
||||||
|
static_assert(I>=0 && I<4, "Index out of range");
|
||||||
|
return (*this)[I];
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
NETGEN_INLINE auto Unpack (SIMD<double,4> a, SIMD<double,4> b)
|
NETGEN_INLINE auto Unpack (SIMD<double,4> a, SIMD<double,4> b)
|
||||||
|
@ -92,6 +92,12 @@ namespace ngcore
|
|||||||
SIMD (double const * p, SIMD<mask64,8> mask)
|
SIMD (double const * p, SIMD<mask64,8> mask)
|
||||||
{ data = _mm512_mask_loadu_pd(_mm512_setzero_pd(), mask.Data(), p); }
|
{ data = _mm512_mask_loadu_pd(_mm512_setzero_pd(), mask.Data(), p); }
|
||||||
SIMD (__m512d _data) { data = _data; }
|
SIMD (__m512d _data) { data = _data; }
|
||||||
|
SIMD (SIMD<double,4> v0, SIMD<double,4> v1)
|
||||||
|
: data(_mm512_set_pd(v1[3], v1[2], v1[1], v1[0], v0[3], v0[2], v0[1], v0[0]))
|
||||||
|
{}
|
||||||
|
SIMD (SIMD<double,6> v0, SIMD<double,2> v1)
|
||||||
|
: data(_mm512_set_pd(v1[1], v1[0], v0[5], v0[4], v0[3], v0[2], v0[1], v0[0]))
|
||||||
|
{}
|
||||||
|
|
||||||
template<typename T, typename std::enable_if<std::is_convertible<T, std::function<double(int)>>::value, int>::type = 0>
|
template<typename T, typename std::enable_if<std::is_convertible<T, std::function<double(int)>>::value, int>::type = 0>
|
||||||
SIMD (const T & func)
|
SIMD (const T & func)
|
||||||
@ -129,6 +135,12 @@ namespace ngcore
|
|||||||
NETGEN_INLINE __m512d Data() const { return data; }
|
NETGEN_INLINE __m512d Data() const { return data; }
|
||||||
NETGEN_INLINE __m512d & Data() { return data; }
|
NETGEN_INLINE __m512d & Data() { return data; }
|
||||||
|
|
||||||
|
template <int I>
|
||||||
|
double Get() const
|
||||||
|
{
|
||||||
|
static_assert(I>=0 && I<8, "Index out of range");
|
||||||
|
return (*this)[I];
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
NETGEN_INLINE SIMD<double,8> operator- (SIMD<double,8> a) { return -a.Data(); }
|
NETGEN_INLINE SIMD<double,8> operator- (SIMD<double,8> a) { return -a.Data(); }
|
||||||
|
@ -28,6 +28,28 @@ namespace ngcore
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr bool IsNativeSIMDSize(int n) {
|
||||||
|
if(n==1) return true;
|
||||||
|
#if defined NETGEN_ARCH_AMD64 || defined __SSE__ || defined __aarch64__
|
||||||
|
if(n==2) return true;
|
||||||
|
#endif
|
||||||
|
#if defined __AVX__
|
||||||
|
if(n==4) return true;
|
||||||
|
#endif
|
||||||
|
#if defined __AVX512F__
|
||||||
|
if(n==8) return true;
|
||||||
|
#endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// split n = k+l such that k is the largest natively supported simd size < n
|
||||||
|
constexpr int GetLargestNativeSIMDPart(int n) {
|
||||||
|
int k = n-1;
|
||||||
|
while(!IsNativeSIMDSize(k))
|
||||||
|
k--;
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T, int N=GetDefaultSIMDSize()> class SIMD;
|
template <typename T, int N=GetDefaultSIMDSize()> class SIMD;
|
||||||
|
|
||||||
@ -67,9 +89,9 @@ namespace ngcore
|
|||||||
|
|
||||||
|
|
||||||
template <int N>
|
template <int N>
|
||||||
class alignas(GetDefaultSIMDSize()*sizeof(int64_t)) SIMD<mask64,N>
|
class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<mask64,N>
|
||||||
{
|
{
|
||||||
static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2);
|
static constexpr int N1 = GetLargestNativeSIMDPart(N);
|
||||||
static constexpr int N2 = N-N1;
|
static constexpr int N2 = N-N1;
|
||||||
|
|
||||||
SIMD<mask64,N1> lo;
|
SIMD<mask64,N1> lo;
|
||||||
@ -123,9 +145,9 @@ namespace ngcore
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<int N>
|
template<int N>
|
||||||
class alignas(GetDefaultSIMDSize()*sizeof(int64_t)) SIMD<int64_t,N>
|
class alignas(GetLargestNativeSIMDPart(N)*sizeof(int64_t)) SIMD<int64_t,N>
|
||||||
{
|
{
|
||||||
static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2);
|
static constexpr int N1 = GetLargestNativeSIMDPart(N);
|
||||||
static constexpr int N2 = N-N1;
|
static constexpr int N2 = N-N1;
|
||||||
|
|
||||||
SIMD<int64_t,N1> lo;
|
SIMD<int64_t,N1> lo;
|
||||||
@ -240,9 +262,9 @@ namespace ngcore
|
|||||||
|
|
||||||
|
|
||||||
template<int N>
|
template<int N>
|
||||||
class alignas(GetDefaultSIMDSize()*sizeof(double)) SIMD<double, N>
|
class alignas(GetLargestNativeSIMDPart(N)*sizeof(double)) SIMD<double, N>
|
||||||
{
|
{
|
||||||
static constexpr int N1 = std::min(GetDefaultSIMDSize(), N/2);
|
static constexpr int N1 = GetLargestNativeSIMDPart(N);
|
||||||
static constexpr int N2 = N-N1;
|
static constexpr int N2 = N-N1;
|
||||||
|
|
||||||
SIMD<double, N1> lo;
|
SIMD<double, N1> lo;
|
||||||
@ -543,7 +565,7 @@ namespace ngcore
|
|||||||
|
|
||||||
|
|
||||||
template <int i, typename T, int N>
|
template <int i, typename T, int N>
|
||||||
T get(SIMD<T,N> a) { return a[i]; }
|
T get(SIMD<T,N> a) { return a.template Get<i>(); }
|
||||||
|
|
||||||
template <int NUM, typename FUNC>
|
template <int NUM, typename FUNC>
|
||||||
NETGEN_INLINE void Iterate2 (FUNC f)
|
NETGEN_INLINE void Iterate2 (FUNC f)
|
||||||
|
@ -86,6 +86,9 @@ NETGEN_INLINE SIMD<int64_t,2> operator- (SIMD<int64_t,2> a, SIMD<int64_t,2> b) {
|
|||||||
SIMD () {}
|
SIMD () {}
|
||||||
SIMD (const SIMD &) = default;
|
SIMD (const SIMD &) = default;
|
||||||
SIMD (double v0, double v1) { data = _mm_set_pd(v1,v0); }
|
SIMD (double v0, double v1) { data = _mm_set_pd(v1,v0); }
|
||||||
|
SIMD (SIMD<double,1> v0, SIMD<double,1> v1)
|
||||||
|
: data{_mm_set_pd(v0.Data(), v1.Data())}
|
||||||
|
{ }
|
||||||
SIMD (std::array<double, 2> arr)
|
SIMD (std::array<double, 2> arr)
|
||||||
: data{_mm_set_pd(arr[1], arr[0])}
|
: data{_mm_set_pd(arr[1], arr[0])}
|
||||||
{}
|
{}
|
||||||
@ -137,6 +140,13 @@ NETGEN_INLINE SIMD<int64_t,2> operator- (SIMD<int64_t,2> a, SIMD<int64_t,2> b) {
|
|||||||
NETGEN_INLINE __m128d Data() const { return data; }
|
NETGEN_INLINE __m128d Data() const { return data; }
|
||||||
NETGEN_INLINE __m128d & Data() { return data; }
|
NETGEN_INLINE __m128d & Data() { return data; }
|
||||||
|
|
||||||
|
template <int I>
|
||||||
|
double Get()
|
||||||
|
{
|
||||||
|
static_assert(I>=0 && I<2, "Index out of range");
|
||||||
|
return (*this)[I];
|
||||||
|
}
|
||||||
|
|
||||||
operator std::tuple<double&,double&> ()
|
operator std::tuple<double&,double&> ()
|
||||||
{
|
{
|
||||||
auto pdata = (double*)&data;
|
auto pdata = (double*)&data;
|
||||||
|
Loading…
Reference in New Issue
Block a user