From 25011c8407188765c27972216be86a0d5a10152b Mon Sep 17 00:00:00 2001 From: Joachim Schoeberl Date: Fri, 5 Feb 2021 11:59:03 +0100 Subject: [PATCH] arm-simd: HSum, tuple support --- libsrc/core/simd_arm64.hpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/libsrc/core/simd_arm64.hpp b/libsrc/core/simd_arm64.hpp index 34cf1862..f2572d34 100644 --- a/libsrc/core/simd_arm64.hpp +++ b/libsrc/core/simd_arm64.hpp @@ -21,7 +21,10 @@ namespace ngcore static constexpr int Size() { return 2; } // static NETGEN_INLINE SIMD GetMaskFromBits (unsigned int i); int64_t operator[] (int i) const { return mask[i]; } - + + template + int64_t Get() const { return mask[I]; } + auto Lo() const { return mask[0]; } auto Hi() const { return mask[1]; } }; @@ -84,31 +87,37 @@ namespace ngcore // NETGEN_INLINE double operator[] (int i) const { return ((double*)(&data))[i]; } NETGEN_INLINE double operator[] (int i) const { return data[i]; } + NETGEN_INLINE double & operator[] (int i) { return ((double*)&data)[i]; } + + template + double Get() const { return data[I]; } + NETGEN_INLINE auto Data() const { return data; } NETGEN_INLINE auto & Data() { return data; } - operator std::tuple () { auto pdata = (double*)&data; return std::tuple(pdata[0], pdata[1]); } - double Lo() const { return data[0]; } - double Hi() const { return data[1]; } - // __ai float64x1_t vget_high_f64(float64x2_t __p0) { + double Lo() const { return Get<0>(); } // data[0]; } + double Hi() const { return Get<1>(); } // data[1]; } + // double Hi() const { return vget_high_f64(data)[0]; } }; NETGEN_INLINE double HSum (SIMD sd) { - return sd[0]+sd[1]; + return sd.Lo()+sd.Hi(); // sd[0]+sd[1]; } NETGEN_INLINE SIMD HSum (SIMD a, SIMD b) { - return SIMD (a[0]+a[1], b[0]+b[1]); + // return SIMD (a[0]+a[1], b[0]+b[1]); + return vpaddq_f64(a.Data(), b.Data()); + } // a*b+c