Merge remote-tracking branch 'stefanbruens/fix_aarch64_uint_mask'

This commit is contained in:
mhochsteger@cerbsim.com 2022-03-16 10:27:43 +01:00
commit 94bd713310

View File

@ -14,9 +14,10 @@ namespace ngcore
mask[1] = i > 1 ? -1 : 0; mask[1] = i > 1 ? -1 : 0;
} }
SIMD (bool i0, bool i1) { mask[0] = i0 ? -1:0; mask[1] = i1 ? -1 : 0; } SIMD (bool i0, bool i1) { mask[0] = i0 ? -1 : 0; mask[1] = i1 ? -1 : 0; }
SIMD (SIMD<mask64,1> i0, SIMD<mask64,1> i1) { mask[0] = i0[0]; mask[1] = i1[0]; } SIMD (SIMD<mask64,1> i0, SIMD<mask64,1> i1) { mask[0] = i0[0]; mask[1] = i1[0]; }
SIMD (float64x2_t _data) : mask{_data} { } // SIMD (float64x2_t _data) : mask{_data} { }
SIMD (int64x2_t _data) : mask{_data} { }
auto Data() const { return mask; } auto Data() const { return mask; }
static constexpr int Size() { return 2; } static constexpr int Size() { return 2; }
// static NETGEN_INLINE SIMD<mask64, 2> GetMaskFromBits (unsigned int i); // static NETGEN_INLINE SIMD<mask64, 2> GetMaskFromBits (unsigned int i);
@ -165,7 +166,8 @@ namespace ngcore
NETGEN_INLINE SIMD<double,2> If (SIMD<mask64,2> a, SIMD<double,2> b, SIMD<double,2> c) NETGEN_INLINE SIMD<double,2> If (SIMD<mask64,2> a, SIMD<double,2> b, SIMD<double,2> c)
{ {
// return { a[0] ? b[0] : c[0], a[1] ? b[1] : c[1] }; // return { a[0] ? b[0] : c[0], a[1] ? b[1] : c[1] };
return vbslq_f64(a.Data(), b.Data(), c.Data()); uint64x2_t mask = vreinterpretq_u64_s64(a.Data());
return vbslq_f64(mask, b.Data(), c.Data());
} }
NETGEN_INLINE SIMD<int64_t,2> If (SIMD<mask64,2> a, SIMD<int64_t,2> b, SIMD<int64_t,2> c) NETGEN_INLINE SIMD<int64_t,2> If (SIMD<mask64,2> a, SIMD<int64_t,2> b, SIMD<int64_t,2> c)
{ {
@ -174,7 +176,10 @@ namespace ngcore
NETGEN_INLINE SIMD<mask64,2> operator&& (SIMD<mask64,2> a, SIMD<mask64,2> b) NETGEN_INLINE SIMD<mask64,2> operator&& (SIMD<mask64,2> a, SIMD<mask64,2> b)
{ {
return vandq_u64 (a.Data(), b.Data()); uint64x2_t m1 = vreinterpretq_u64_s64(a.Data());
uint64x2_t m2 = vreinterpretq_u64_s64(b.Data());
uint64x2_t res = vandq_u64 (m1, m2);
return vreinterpretq_s64_u64(res);
} }
} }