diff --git a/libsrc/core/simd_arm64.hpp b/libsrc/core/simd_arm64.hpp index e3c70b8f..34cf1862 100644 --- a/libsrc/core/simd_arm64.hpp +++ b/libsrc/core/simd_arm64.hpp @@ -10,14 +10,13 @@ namespace ngcore public: SIMD (int i) { - mask[0] = i > 0; - mask[1] = i > 1; + mask[0] = i > 0 ? -1 : 0; + mask[1] = i > 1 ? -1 : 0; } - SIMD (bool i0, bool i1) { mask[0] = i0; mask[1] = i1; } + SIMD (bool i0, bool i1) { mask[0] = i0 ? -1:0; mask[1] = i1 ? -1 : 0; } SIMD (SIMD i0, SIMD i1) { mask[0] = i0[0]; mask[1] = i1[0]; } - - //SIMD (__m128i _mask) : mask(_mask) { ; } + SIMD (float64x2_t _data) : mask{_data} { } auto Data() const { return mask; } static constexpr int Size() { return 2; } // static NETGEN_INLINE SIMD GetMaskFromBits (unsigned int i); @@ -37,26 +36,21 @@ namespace ngcore static constexpr int Size() { return 2; } SIMD () {} SIMD (const SIMD &) = default; - SIMD (double v0, double v1) { data[0] = v0; data[1] = v1; } - - SIMD (std::array arr) - // : data{(arr[0], arr[1])} - { - data[0] = arr[0]; - data[1] = arr[1]; - } - + // SIMD (double v0, double v1) : data{v0,v1} { } + SIMD (double v0, double v1) : data{vcombine_f64(float64x1_t{v0}, float64x1_t{v1})} { } + SIMD (std::array arr) : data{arr[0], arr[1]} { } + SIMD & operator= (const SIMD &) = default; - SIMD (double val) { data[0] = data[1] = val; } - SIMD (int val) { data[0] = data[1] = double(val); } - SIMD (size_t val) { data[0] = data[1] = double(val); } + SIMD (double val) : data{val,val} { } + SIMD (int val) : data{double(val),double(val)} { } + SIMD (size_t val) : data{double(val),double(val)} { } SIMD (double const * p) { - // data = vld1q_f64(p); - data[0] = p[0]; - data[1] = p[1]; + data = vld1q_f64(p); + // data[0] = p[0]; + // data[1] = p[1]; } SIMD (double const * p, SIMD mask) @@ -75,9 +69,11 @@ namespace ngcore void Store (double * p) { - // vst1q_f64(p, data); + vst1q_f64(p, data); + /* p[0] = data[0]; p[1] = data[1]; + */ } void Store (double * p, SIMD mask) @@ -86,7 +82,8 @@ namespace ngcore if (mask[1]) p[1] = data[1]; } - NETGEN_INLINE double operator[] (int i) const { return ((double*)(&data))[i]; } + // NETGEN_INLINE double operator[] (int i) const { return ((double*)(&data))[i]; } + NETGEN_INLINE double operator[] (int i) const { return data[i]; } NETGEN_INLINE auto Data() const { return data; } NETGEN_INLINE auto & Data() { return data; } @@ -99,6 +96,7 @@ namespace ngcore double Lo() const { return data[0]; } double Hi() const { return data[1]; } + // __ai float64x1_t vget_high_f64(float64x2_t __p0) { }; @@ -132,15 +130,37 @@ namespace ngcore { return FNMA(SIMD (a), b, c); } + + NETGEN_INLINE SIMD operator+ (SIMD a, SIMD b) + { return a.Data()+b.Data(); } + + NETGEN_INLINE SIMD operator- (SIMD a, SIMD b) + { return a.Data()-b.Data(); } + NETGEN_INLINE SIMD operator- (SIMD a) + { return -a.Data(); } + + NETGEN_INLINE SIMD operator* (SIMD a, SIMD b) + { return a.Data()*b.Data(); } + + NETGEN_INLINE SIMD operator/ (SIMD a, SIMD b) + { return a.Data()/b.Data(); } + + NETGEN_INLINE SIMD If (SIMD a, SIMD b, SIMD c) { - return { a[0] ? b[0] : c[0], a[1] ? b[1] : c[1] }; + // return { a[0] ? b[0] : c[0], a[1] ? b[1] : c[1] }; + return vbslq_f64(a.Data(), b.Data(), c.Data()); } NETGEN_INLINE SIMD If (SIMD a, SIMD b, SIMD c) { return SIMD (a[0] ? b[0] : c[0], a[1] ? b[1] : c[1]); } + + NETGEN_INLINE SIMD operator&& (SIMD a, SIMD b) + { + return vandq_u64 (a.Data(), b.Data()); + } }