Support for Apple M1

This commit is contained in:
Matthias Hochsteger 2020-12-15 10:12:30 +01:00
parent 1b55c51da5
commit dbe894fea3
5 changed files with 96 additions and 35 deletions

View File

@ -67,6 +67,14 @@
#endif
#endif
#if defined(__amd64__) || defined(_M_AMD64)
#define NETGEN_ARCH_AMD64
#endif
#if defined(__arm64__) || defined(_M_ARM64)
#define NETGEN_ARCH_ARM
#endif
#ifdef __MAC_OS_X_VERSION_MIN_REQUIRED
#if __MAC_OS_X_VERSION_MIN_REQUIRED < 101400
// The c++ standard library on MacOS 10.13 and earlier has no aligned new operator,

View File

@ -11,7 +11,7 @@
#include "simd_generic.hpp"
#if (defined(_M_AMD64) || defined(_M_X64) || defined(__SSE__))
#ifdef NETGEN_ARCH_AMD64
#ifndef __SSE__
#define __SSE__
#endif
@ -28,6 +28,7 @@
namespace ngcore
{
#ifdef NETGEN_ARCH_AMD64
NETGEN_INLINE auto HSum (SIMD<double,2> v1, SIMD<double,2> v2, SIMD<double,2> v3, SIMD<double,2> v4)
{
SIMD<double,2> hsum1 = my_mm_hadd_pd (v1.Data(), v2.Data());
@ -35,6 +36,12 @@ namespace ngcore
return SIMD<double,4> (hsum1, hsum2);
}
NETGEN_INLINE auto GetMaskFromBits( unsigned int i )
{
return SIMD<mask64>::GetMaskFromBits(i);
}
#endif
NETGEN_INLINE void SIMDTranspose (SIMD<double,4> a1, SIMD<double,4> a2, SIMD <double,4> a3, SIMD<double,4> a4,
SIMD<double,4> & b1, SIMD<double,4> & b2, SIMD<double,4> & b3, SIMD<double,4> & b4)
@ -59,11 +66,6 @@ namespace ngcore
{
return SIMD<double,4>(HSum(s1), HSum(s2), HSum(s3), HSum(s4));
}
NETGEN_INLINE auto GetMaskFromBits( unsigned int i )
{
return SIMD<mask64>::GetMaskFromBits(i);
}
}
#endif // NETGEN_CORE_SIMD_HPP

View File

@ -21,10 +21,10 @@ namespace ngcore
return 8;
#elif defined __AVX__
return 4;
#elif (defined(_M_AMD64) || defined(_M_X64) || defined(__SSE__))
#elif defined NETGEN_ARCH_AMD64
return 2;
#else
return 1;
return 2;
#endif
}
@ -104,8 +104,10 @@ namespace ngcore
SIMD () {}
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
SIMD (int64_t val) { data = val; }
SIMD (std::array<int64_t, 1> arr)
SIMD (int val) : data{val} {}
SIMD (int64_t val) : data{val} {}
SIMD (size_t val) : data(val) {}
explicit SIMD (std::array<int64_t, 1> arr)
: data{arr[0]}
{}
@ -136,16 +138,18 @@ namespace ngcore
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
SIMD (int val) : lo{val}, high{val} { ; }
SIMD (int64_t val) : lo{val}, high{val} { ; }
SIMD (size_t val) : lo{val}, high{val} { ; }
SIMD (SIMD<int64_t,N1> lo_, SIMD<int64_t,N2> high_) : lo(lo_), high(high_) { ; }
SIMD( std::array<int64_t, N> arr )
explicit SIMD( std::array<int64_t, N> arr )
: lo(detail::array_range<N1>(arr, 0)),
high(detail::array_range<N2>(arr, N1))
{}
template<typename ...T>
SIMD(const T... vals)
explicit SIMD(const T... vals)
: lo(detail::array_range<N1>(std::array<int64_t, N>{vals...}, 0)),
high(detail::array_range<N2>(std::array<int64_t, N>{vals...}, N1))
{
@ -204,7 +208,7 @@ namespace ngcore
SIMD (size_t val) { data = val; }
SIMD (double const * p) { data = *p; }
SIMD (double const * p, SIMD<mask64,1> mask) { data = mask.Data() ? *p : 0.0; }
SIMD (std::array<double, 1> arr)
explicit SIMD (std::array<double, 1> arr)
: data{arr[0]}
{}
@ -253,19 +257,17 @@ namespace ngcore
template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
SIMD (const T & func)
{
for(auto i : IntRange(N1))
lo[i] = func(i);
for(auto i : IntRange(N2))
high[i] = func(N1+i);
double *p = (double*)this;
for(auto i : IntRange(N))
p[i] = func(i);
}
template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
SIMD & operator= (const T & func)
{
for(auto i : IntRange(N1))
lo[i] = func(i);
for(auto i : IntRange(N2))
high[i] = func(N1+i);
double *p = (double*)this;
for(auto i : IntRange(N))
p[i] = func(i);
return *this;
}
@ -285,13 +287,13 @@ namespace ngcore
: lo{p, mask.Lo()}, high{p+N1, mask.Hi()}
{ }
SIMD( std::array<double, N> arr )
explicit SIMD( std::array<double, N> arr )
: lo(detail::array_range<N1>(arr, 0)),
high(detail::array_range<N2>(arr, N1))
{}
template<typename ...T>
SIMD(const T... vals)
explicit SIMD(const T... vals)
: lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)),
high(detail::array_range<N2>(std::array<double, N>{vals...}, N1))
{
@ -312,7 +314,10 @@ namespace ngcore
template<typename=std::enable_if<N==2>>
operator std::tuple<double&,double&> ()
{ return std::tuple<double&,double&>((*this)[0], (*this)[1]); }
{
double *p = (double*)this;
return std::tuple<double&,double&>(p[0], p[1]);
}
template<typename=std::enable_if<N==4>>
operator std::tuple<double&,double&,double&,double&> ()
@ -325,6 +330,7 @@ namespace ngcore
if constexpr(I<N1) return lo.template Get<I>();
else return high.template Get<I-N1>();
}
auto Data() const { return *this; }
};
@ -359,42 +365,42 @@ namespace ngcore
}
template <typename T, int N>
NETGEN_INLINE SIMD<mask64,N> operator< (SIMD<T,N> & a, SIMD<T,N> b)
NETGEN_INLINE SIMD<mask64,N> operator< (SIMD<T,N> a, SIMD<T,N> b)
{
if constexpr(N==1) return a.Data() < b.Data();
else return { a.Lo()<b.Lo(), a.Hi()<b.Hi() };
}
template <typename T, int N>
NETGEN_INLINE SIMD<mask64,N> operator<= (SIMD<T,N> & a, SIMD<T,N> b)
NETGEN_INLINE SIMD<mask64,N> operator<= (SIMD<T,N> a, SIMD<T,N> b)
{
if constexpr(N==1) return a.Data() <= b.Data();
else return { a.Lo()<=b.Lo(), a.Hi()<=b.Hi() };
}
template <typename T, int N>
NETGEN_INLINE SIMD<mask64,N> operator> (SIMD<T,N> & a, SIMD<T,N> b)
NETGEN_INLINE SIMD<mask64,N> operator> (SIMD<T,N> a, SIMD<T,N> b)
{
if constexpr(N==1) return a.Data() > b.Data();
else return { a.Lo()>b.Lo(), a.Hi()>b.Hi() };
}
template <typename T, int N>
NETGEN_INLINE SIMD<mask64,N> operator>= (SIMD<T,N> & a, SIMD<T,N> b)
NETGEN_INLINE SIMD<mask64,N> operator>= (SIMD<T,N> a, SIMD<T,N> b)
{
if constexpr(N==1) return a.Data() >= b.Data();
else return { a.Lo()>=b.Lo(), a.Hi()>=b.Hi() };
}
template <typename T, int N>
NETGEN_INLINE SIMD<mask64,N> operator== (SIMD<T,N> & a, SIMD<T,N> b)
NETGEN_INLINE SIMD<mask64,N> operator== (SIMD<T,N> a, SIMD<T,N> b)
{
if constexpr(N==1) return a.Data() == b.Data();
else return { a.Lo()==b.Lo(), a.Hi()==b.Hi() };
}
template <typename T, int N>
NETGEN_INLINE SIMD<mask64,N> operator!= (SIMD<T,N> & a, SIMD<T,N> b)
NETGEN_INLINE SIMD<mask64,N> operator!= (SIMD<T,N> a, SIMD<T,N> b)
{
if constexpr(N==1) return a.Data() != b.Data();
else return { a.Lo()!=b.Lo(), a.Hi()!=b.Hi() };
@ -547,6 +553,30 @@ namespace ngcore
return ost;
}
using std::sqrt;
template <int N>
NETGEN_INLINE ngcore::SIMD<double,N> sqrt (ngcore::SIMD<double,N> a) {
return ngcore::SIMD<double>([a](int i)->double { return sqrt(a[i]); } );
}
using std::fabs;
template <int N>
NETGEN_INLINE ngcore::SIMD<double,N> fabs (ngcore::SIMD<double,N> a) {
return ngcore::SIMD<double>([a](int i)->double { return fabs(a[i]); } );
}
using std::floor;
template <int N>
NETGEN_INLINE ngcore::SIMD<double,N> floor (ngcore::SIMD<double,N> a) {
return ngcore::SIMD<double>([a](int i)->double { return floor(a[i]); } );
}
using std::ceil;
template <int N>
NETGEN_INLINE ngcore::SIMD<double,N> ceil (ngcore::SIMD<double,N> a) {
return ngcore::SIMD<double>([a](int i)->double { return ceil(a[i]); } );
}
using std::exp;
template <int N>
NETGEN_INLINE ngcore::SIMD<double,N> exp (ngcore::SIMD<double,N> a) {

View File

@ -201,14 +201,14 @@ namespace ngcore
;
}
static size_t calibrate_init_tsc = __rdtsc();
static size_t calibrate_init_tsc = GetTimeCounter();
typedef std::chrono::system_clock TClock;
static TClock::time_point calibrate_init_clock = TClock::now();
void TaskManager :: StopWorkers()
{
done = true;
double delta_tsc = __rdtsc()-calibrate_init_tsc;
double delta_tsc = GetTimeCounter()-calibrate_init_tsc;
double delta_sec = std::chrono::duration<double>(TClock::now()-calibrate_init_clock).count();
double frequ = (delta_sec != 0) ? delta_tsc/delta_sec : 2.7e9;
@ -421,7 +421,11 @@ namespace ngcore
if (workers_on_node[j])
{
while (complete[j] != jobnr)
{
#ifdef NETGEN_ARCH_AMD64
_mm_pause();
#endif // NETGEN_ARCH_AMD64
}
}
func = nullptr;

View File

@ -8,13 +8,19 @@
#include <sstream>
#include <string>
#include "ngcore_api.hpp" // for NGCORE_API and CPU arch macros
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
#include <mach/mach_time.h>
#endif
#ifdef NETGEN_ARCH_AMD64
#ifdef WIN32
#include <intrin.h> // for __rdtsc() CPU time step counter
#else
#include <x86intrin.h> // for __rdtsc() CPU time step counter
#endif // WIN32
#include "ngcore_api.hpp" // for NGCORE_API
#endif // NETGEN_ARCH_AMD64
namespace ngcore
{
@ -52,7 +58,16 @@ namespace ngcore
inline TTimePoint GetTimeCounter() noexcept
{
return TTimePoint(__rdtsc());
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
return mach_absolute_time();
#elif defined(NETGEN_ARCH_AMD64)
return __rdtsc();
#elif defined(NETGEN_ARCH_ARM)
return __builtin_readcyclecounter();
#else
#warning "Unsupported CPU architecture"
return 0;
#endif
}
template <class T>
@ -161,7 +176,9 @@ namespace ngcore
while (!m.compare_exchange_weak(should, true))
{
should = false;
#ifdef NETGEN_ARCH_AMD64
_mm_pause();
#endif // NETGEN_ARCH_AMD64
}
}
void unlock()