Support for Apple M1

2025-06-06 10:17:49 +05:00 · 2020-12-15 10:12:30 +01:00 · 2020-12-15 10:12:30 +01:00 · dbe894fea3
commit dbe894fea3
parent 1b55c51da5
5 changed files with 96 additions and 35 deletions
--- a/libsrc/core/ngcore_api.hpp
+++ b/libsrc/core/ngcore_api.hpp
@ -67,6 +67,14 @@
  #endif
 #endif

+#if defined(__amd64__) || defined(_M_AMD64)
+#define NETGEN_ARCH_AMD64
+#endif
+
+#if defined(__arm64__) || defined(_M_ARM64)
+#define NETGEN_ARCH_ARM
+#endif
+
 #ifdef __MAC_OS_X_VERSION_MIN_REQUIRED
 #if __MAC_OS_X_VERSION_MIN_REQUIRED < 101400
 // The c++ standard library on MacOS 10.13 and earlier has no aligned new operator,
--- a/libsrc/core/simd.hpp
+++ b/libsrc/core/simd.hpp
@ -11,7 +11,7 @@

 #include "simd_generic.hpp"

-#if (defined(_M_AMD64) || defined(_M_X64) || defined(__SSE__))
+#ifdef NETGEN_ARCH_AMD64
 #ifndef __SSE__
 #define __SSE__
 #endif
@ -28,6 +28,7 @@

 namespace ngcore
 {
+#ifdef NETGEN_ARCH_AMD64
  NETGEN_INLINE auto HSum (SIMD<double,2> v1, SIMD<double,2> v2, SIMD<double,2> v3, SIMD<double,2> v4)
  {
    SIMD<double,2> hsum1 = my_mm_hadd_pd (v1.Data(), v2.Data());
@ -35,6 +36,12 @@ namespace ngcore
    return SIMD<double,4> (hsum1, hsum2);
  }

+  NETGEN_INLINE auto GetMaskFromBits( unsigned int i )
+  {
+    return SIMD<mask64>::GetMaskFromBits(i);
+  }
+#endif
+

  NETGEN_INLINE void SIMDTranspose (SIMD<double,4> a1, SIMD<double,4> a2, SIMD <double,4> a3, SIMD<double,4> a4,
                             SIMD<double,4> & b1, SIMD<double,4> & b2, SIMD<double,4> & b3, SIMD<double,4> & b4)
@ -59,11 +66,6 @@ namespace ngcore
  {
    return SIMD<double,4>(HSum(s1), HSum(s2), HSum(s3), HSum(s4));
  }
-
-  NETGEN_INLINE auto GetMaskFromBits( unsigned int i )
-  {
-    return SIMD<mask64>::GetMaskFromBits(i);
-  }
 }

 #endif // NETGEN_CORE_SIMD_HPP
--- a/libsrc/core/simd_generic.hpp
+++ b/libsrc/core/simd_generic.hpp
@ -21,10 +21,10 @@ namespace ngcore
    return 8;
 #elif defined __AVX__
    return 4;
-#elif (defined(_M_AMD64) || defined(_M_X64) || defined(__SSE__))
+#elif defined NETGEN_ARCH_AMD64
    return 2;
 #else
-    return 1;
+    return 2;
 #endif
  }

@ -104,8 +104,10 @@ namespace ngcore
    SIMD () {}
    SIMD (const SIMD &) = default;
    SIMD & operator= (const SIMD &) = default;
-    SIMD (int64_t val) { data = val; }
-    SIMD (std::array<int64_t, 1> arr)
+    SIMD (int val) : data{val} {}
+    SIMD (int64_t val) : data{val} {}
+    SIMD (size_t val) : data(val) {}
+    explicit SIMD (std::array<int64_t, 1> arr)
        : data{arr[0]}
    {}

@ -136,16 +138,18 @@ namespace ngcore
    SIMD (const SIMD &) = default;
    SIMD & operator= (const SIMD &) = default;

+    SIMD (int val) : lo{val}, high{val} { ; }
    SIMD (int64_t val) : lo{val}, high{val} { ; }
+    SIMD (size_t val) : lo{val}, high{val} { ; }
    SIMD (SIMD<int64_t,N1> lo_, SIMD<int64_t,N2> high_) : lo(lo_), high(high_) { ; }

-    SIMD( std::array<int64_t, N> arr )
+    explicit SIMD( std::array<int64_t, N> arr )
        : lo(detail::array_range<N1>(arr, 0)),
          high(detail::array_range<N2>(arr, N1))
      {}

    template<typename ...T>
-    SIMD(const T... vals)
+    explicit SIMD(const T... vals)
    : lo(detail::array_range<N1>(std::array<int64_t, N>{vals...}, 0)),
      high(detail::array_range<N2>(std::array<int64_t, N>{vals...}, N1))
      {
@ -204,7 +208,7 @@ namespace ngcore
    SIMD (size_t val) { data = val; }
    SIMD (double const * p) { data = *p; }
    SIMD (double const * p, SIMD<mask64,1> mask) { data = mask.Data() ? *p : 0.0; }
-    SIMD (std::array<double, 1> arr)
+    explicit SIMD (std::array<double, 1> arr)
        : data{arr[0]}
    {}

@ -253,19 +257,17 @@ namespace ngcore
    template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
    SIMD (const T & func)
    {
-      for(auto i : IntRange(N1))
-          lo[i] = func(i);
-      for(auto i : IntRange(N2))
-          high[i] = func(N1+i);
+      double  *p = (double*)this;
+      for(auto i : IntRange(N))
+          p[i] = func(i);
    }

    template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
    SIMD & operator= (const T & func)
    {
-      for(auto i : IntRange(N1))
-          lo[i] = func(i);
-      for(auto i : IntRange(N2))
-          high[i] = func(N1+i);
+      double  *p = (double*)this;
+      for(auto i : IntRange(N))
+          p[i] = func(i);
      return *this;
    }

@ -285,13 +287,13 @@ namespace ngcore
        : lo{p, mask.Lo()}, high{p+N1, mask.Hi()}
      { }

-    SIMD( std::array<double, N> arr )
+    explicit SIMD( std::array<double, N> arr )
        : lo(detail::array_range<N1>(arr, 0)),
          high(detail::array_range<N2>(arr, N1))
      {}

    template<typename ...T>
-    SIMD(const T... vals)
+    explicit SIMD(const T... vals)
    : lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)),
      high(detail::array_range<N2>(std::array<double, N>{vals...}, N1))
      {
@ -312,7 +314,10 @@ namespace ngcore

    template<typename=std::enable_if<N==2>>
    operator std::tuple<double&,double&> ()
-    { return std::tuple<double&,double&>((*this)[0], (*this)[1]); }
+    { 
+	double *p = (double*)this;
+	return std::tuple<double&,double&>(p[0], p[1]); 
+    }

    template<typename=std::enable_if<N==4>>
    operator std::tuple<double&,double&,double&,double&> ()
@ -325,6 +330,7 @@ namespace ngcore
      if constexpr(I<N1) return lo.template Get<I>();
      else               return high.template Get<I-N1>();
    }
+    auto Data() const { return *this; }
  };


@ -359,42 +365,42 @@ namespace ngcore
  }

  template <typename T, int N>
-  NETGEN_INLINE SIMD<mask64,N> operator< (SIMD<T,N> & a, SIMD<T,N> b)
+  NETGEN_INLINE SIMD<mask64,N> operator< (SIMD<T,N> a, SIMD<T,N> b)
    {
      if constexpr(N==1) return a.Data() < b.Data();
      else               return { a.Lo()<b.Lo(), a.Hi()<b.Hi() };
    }

  template <typename T, int N>
-  NETGEN_INLINE SIMD<mask64,N> operator<= (SIMD<T,N> & a, SIMD<T,N> b)
+  NETGEN_INLINE SIMD<mask64,N> operator<= (SIMD<T,N> a, SIMD<T,N> b)
    {
      if constexpr(N==1) return a.Data() <= b.Data();
      else               return { a.Lo()<=b.Lo(), a.Hi()<=b.Hi() };
    }

  template <typename T, int N>
-  NETGEN_INLINE SIMD<mask64,N> operator> (SIMD<T,N> & a, SIMD<T,N> b)
+  NETGEN_INLINE SIMD<mask64,N> operator> (SIMD<T,N> a, SIMD<T,N> b)
    {
      if constexpr(N==1) return a.Data() > b.Data();
      else               return { a.Lo()>b.Lo(), a.Hi()>b.Hi() };
    }

  template <typename T, int N>
-  NETGEN_INLINE SIMD<mask64,N> operator>= (SIMD<T,N> & a, SIMD<T,N> b)
+  NETGEN_INLINE SIMD<mask64,N> operator>= (SIMD<T,N> a, SIMD<T,N> b)
    {
      if constexpr(N==1) return a.Data() >= b.Data();
      else               return { a.Lo()>=b.Lo(), a.Hi()>=b.Hi() };
    }

  template <typename T, int N>
-  NETGEN_INLINE SIMD<mask64,N> operator== (SIMD<T,N> & a, SIMD<T,N> b)
+  NETGEN_INLINE SIMD<mask64,N> operator== (SIMD<T,N> a, SIMD<T,N> b)
    {
      if constexpr(N==1) return a.Data() == b.Data();
      else               return { a.Lo()==b.Lo(), a.Hi()==b.Hi() };
    }

  template <typename T, int N>
-  NETGEN_INLINE SIMD<mask64,N> operator!= (SIMD<T,N> & a, SIMD<T,N> b)
+  NETGEN_INLINE SIMD<mask64,N> operator!= (SIMD<T,N> a, SIMD<T,N> b)
    {
      if constexpr(N==1) return a.Data() != b.Data();
      else               return { a.Lo()!=b.Lo(), a.Hi()!=b.Hi() };
@ -547,6 +553,30 @@ namespace ngcore
    return ost;
  }

+  using std::sqrt;
+  template <int N>
+  NETGEN_INLINE ngcore::SIMD<double,N> sqrt (ngcore::SIMD<double,N> a) {
+    return ngcore::SIMD<double>([a](int i)->double { return sqrt(a[i]); } );
+  }
+
+  using std::fabs;
+  template <int N>
+  NETGEN_INLINE ngcore::SIMD<double,N> fabs (ngcore::SIMD<double,N> a) {
+    return ngcore::SIMD<double>([a](int i)->double { return fabs(a[i]); } );
+  }
+
+  using std::floor;
+  template <int N>
+  NETGEN_INLINE ngcore::SIMD<double,N> floor (ngcore::SIMD<double,N> a) {
+    return ngcore::SIMD<double>([a](int i)->double { return floor(a[i]); } );
+  }
+
+  using std::ceil;
+  template <int N>
+  NETGEN_INLINE ngcore::SIMD<double,N> ceil (ngcore::SIMD<double,N> a) {
+    return ngcore::SIMD<double>([a](int i)->double { return ceil(a[i]); } );
+  }
+
  using std::exp;
  template <int N>
  NETGEN_INLINE ngcore::SIMD<double,N> exp (ngcore::SIMD<double,N> a) {
--- a/libsrc/core/taskmanager.cpp
+++ b/libsrc/core/taskmanager.cpp
@ -201,14 +201,14 @@ namespace ngcore
      ;
  }

-  static size_t calibrate_init_tsc = __rdtsc();
+  static size_t calibrate_init_tsc = GetTimeCounter();
  typedef std::chrono::system_clock TClock;
  static TClock::time_point calibrate_init_clock = TClock::now();
  
  void TaskManager :: StopWorkers()
  {
    done = true;
-    double delta_tsc = __rdtsc()-calibrate_init_tsc;
+    double delta_tsc = GetTimeCounter()-calibrate_init_tsc;
    double delta_sec = std::chrono::duration<double>(TClock::now()-calibrate_init_clock).count();
    double frequ = (delta_sec != 0) ? delta_tsc/delta_sec : 2.7e9;
    
@ -421,7 +421,11 @@ namespace ngcore
      if (workers_on_node[j])
        {
          while (complete[j] != jobnr)
+          {
+#ifdef NETGEN_ARCH_AMD64
            _mm_pause();
+#endif // NETGEN_ARCH_AMD64
+          }
        }

    func = nullptr;
--- a/libsrc/core/utils.hpp
+++ b/libsrc/core/utils.hpp
@ -8,13 +8,19 @@
 #include <sstream>
 #include <string>

+#include "ngcore_api.hpp"       // for NGCORE_API and CPU arch macros
+
+#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
+#include <mach/mach_time.h>
+#endif
+
+#ifdef NETGEN_ARCH_AMD64
 #ifdef WIN32
 #include <intrin.h>   // for __rdtsc()  CPU time step counter
 #else
 #include <x86intrin.h>   // for __rdtsc()  CPU time step counter
 #endif // WIN32
-
-#include "ngcore_api.hpp"       // for NGCORE_API
+#endif // NETGEN_ARCH_AMD64

 namespace ngcore
 {
@ -52,7 +58,16 @@ namespace ngcore

  inline TTimePoint GetTimeCounter() noexcept
  {
-      return TTimePoint(__rdtsc());
+#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
+    return mach_absolute_time();
+#elif defined(NETGEN_ARCH_AMD64)
+    return __rdtsc();
+#elif defined(NETGEN_ARCH_ARM)
+    return __builtin_readcyclecounter();
+#else
+#warning "Unsupported CPU architecture"
+    return 0;
+#endif
  }

  template <class T>
@ -161,7 +176,9 @@ namespace ngcore
      while (!m.compare_exchange_weak(should, true))
        {
          should = false;
+#ifdef NETGEN_ARCH_AMD64
          _mm_pause();
+#endif // NETGEN_ARCH_AMD64
        }
    }
    void unlock()