Fix signedness for ARM Neon mask type

vbslq_f64 and vandq_u64 both require uint64x2_t types as mask arguments, and the Neon intrinsics do not allow for implicit conversion. Fixes build errors with current GCC 11.2.1: /home/abuild/rpmbuild/BUILD/netgen-6.2.2105/libsrc/core/simd_arm64.hpp:171:29: error: cannot convert '__Int64x2_t' to 'uint64x2_t' 171 | return vandq_u64 (a.Data(), b.Data());
2025-04-13 00:27:28 +05:00 · 2022-01-27 12:40:39 +01:00 · 2022-01-27 12:40:39 +01:00 · 5fce0b48b8
commit 5fce0b48b8
parent 86fd2983c7
1 changed files with 9 additions and 4 deletions
--- a/libsrc/core/simd_arm64.hpp
+++ b/libsrc/core/simd_arm64.hpp
@ -14,9 +14,10 @@ namespace ngcore
      mask[1] = i > 1 ? -1 : 0;
    }
-    SIMD (bool i0, bool i1) { mask[0] = i0 ? -1:0; mask[1] = i1 ? -1 : 0; }
+    SIMD (bool i0, bool i1) { mask[0] = i0 ? -1 : 0; mask[1] = i1 ? -1 : 0; }
    SIMD (SIMD<mask64,1> i0, SIMD<mask64,1> i1) { mask[0] = i0[0]; mask[1] = i1[0]; }
-    SIMD (float64x2_t _data) : mask{_data} { }
+    // SIMD (float64x2_t _data) : mask{_data} { }
    SIMD (int64x2_t _data) : mask{_data} { }
    auto Data() const { return mask; }
    static constexpr int Size() { return 2; }
    // static NETGEN_INLINE SIMD<mask64, 2> GetMaskFromBits (unsigned int i);
@ -159,7 +160,8 @@ namespace ngcore
  NETGEN_INLINE SIMD<double,2> If (SIMD<mask64,2> a, SIMD<double,2> b, SIMD<double,2> c)
  {
    // return { a[0] ? b[0] : c[0], a[1] ? b[1] : c[1] };
-    return vbslq_f64(a.Data(), b.Data(), c.Data());
+    uint64x2_t mask = vreinterpretq_u64_s64(a.Data());
    return vbslq_f64(mask, b.Data(), c.Data());
  }
  NETGEN_INLINE SIMD<int64_t,2> If (SIMD<mask64,2> a, SIMD<int64_t,2> b, SIMD<int64_t,2> c)
  {
@ -168,7 +170,10 @@ namespace ngcore
  NETGEN_INLINE SIMD<mask64,2> operator&& (SIMD<mask64,2> a, SIMD<mask64,2> b)
  {
-    return vandq_u64 (a.Data(), b.Data());
+    uint64x2_t m1 = vreinterpretq_u64_s64(a.Data());
    uint64x2_t m2 = vreinterpretq_u64_s64(b.Data());
    uint64x2_t res = vandq_u64 (m1, m2);
    return vreinterpretq_s64_u64(res);
  }
 }