mirror of
https://github.com/NGSolve/netgen.git
synced 2025-01-12 14:10:34 +05:00
Support for Apple M1
This commit is contained in:
parent
1b55c51da5
commit
dbe894fea3
@ -67,6 +67,14 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__amd64__) || defined(_M_AMD64)
|
||||||
|
#define NETGEN_ARCH_AMD64
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__arm64__) || defined(_M_ARM64)
|
||||||
|
#define NETGEN_ARCH_ARM
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __MAC_OS_X_VERSION_MIN_REQUIRED
|
#ifdef __MAC_OS_X_VERSION_MIN_REQUIRED
|
||||||
#if __MAC_OS_X_VERSION_MIN_REQUIRED < 101400
|
#if __MAC_OS_X_VERSION_MIN_REQUIRED < 101400
|
||||||
// The c++ standard library on MacOS 10.13 and earlier has no aligned new operator,
|
// The c++ standard library on MacOS 10.13 and earlier has no aligned new operator,
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
|
|
||||||
#include "simd_generic.hpp"
|
#include "simd_generic.hpp"
|
||||||
|
|
||||||
#if (defined(_M_AMD64) || defined(_M_X64) || defined(__SSE__))
|
#ifdef NETGEN_ARCH_AMD64
|
||||||
#ifndef __SSE__
|
#ifndef __SSE__
|
||||||
#define __SSE__
|
#define __SSE__
|
||||||
#endif
|
#endif
|
||||||
@ -28,6 +28,7 @@
|
|||||||
|
|
||||||
namespace ngcore
|
namespace ngcore
|
||||||
{
|
{
|
||||||
|
#ifdef NETGEN_ARCH_AMD64
|
||||||
NETGEN_INLINE auto HSum (SIMD<double,2> v1, SIMD<double,2> v2, SIMD<double,2> v3, SIMD<double,2> v4)
|
NETGEN_INLINE auto HSum (SIMD<double,2> v1, SIMD<double,2> v2, SIMD<double,2> v3, SIMD<double,2> v4)
|
||||||
{
|
{
|
||||||
SIMD<double,2> hsum1 = my_mm_hadd_pd (v1.Data(), v2.Data());
|
SIMD<double,2> hsum1 = my_mm_hadd_pd (v1.Data(), v2.Data());
|
||||||
@ -35,6 +36,12 @@ namespace ngcore
|
|||||||
return SIMD<double,4> (hsum1, hsum2);
|
return SIMD<double,4> (hsum1, hsum2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NETGEN_INLINE auto GetMaskFromBits( unsigned int i )
|
||||||
|
{
|
||||||
|
return SIMD<mask64>::GetMaskFromBits(i);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
NETGEN_INLINE void SIMDTranspose (SIMD<double,4> a1, SIMD<double,4> a2, SIMD <double,4> a3, SIMD<double,4> a4,
|
NETGEN_INLINE void SIMDTranspose (SIMD<double,4> a1, SIMD<double,4> a2, SIMD <double,4> a3, SIMD<double,4> a4,
|
||||||
SIMD<double,4> & b1, SIMD<double,4> & b2, SIMD<double,4> & b3, SIMD<double,4> & b4)
|
SIMD<double,4> & b1, SIMD<double,4> & b2, SIMD<double,4> & b3, SIMD<double,4> & b4)
|
||||||
@ -59,11 +66,6 @@ namespace ngcore
|
|||||||
{
|
{
|
||||||
return SIMD<double,4>(HSum(s1), HSum(s2), HSum(s3), HSum(s4));
|
return SIMD<double,4>(HSum(s1), HSum(s2), HSum(s3), HSum(s4));
|
||||||
}
|
}
|
||||||
|
|
||||||
NETGEN_INLINE auto GetMaskFromBits( unsigned int i )
|
|
||||||
{
|
|
||||||
return SIMD<mask64>::GetMaskFromBits(i);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // NETGEN_CORE_SIMD_HPP
|
#endif // NETGEN_CORE_SIMD_HPP
|
||||||
|
@ -21,10 +21,10 @@ namespace ngcore
|
|||||||
return 8;
|
return 8;
|
||||||
#elif defined __AVX__
|
#elif defined __AVX__
|
||||||
return 4;
|
return 4;
|
||||||
#elif (defined(_M_AMD64) || defined(_M_X64) || defined(__SSE__))
|
#elif defined NETGEN_ARCH_AMD64
|
||||||
return 2;
|
return 2;
|
||||||
#else
|
#else
|
||||||
return 1;
|
return 2;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,8 +104,10 @@ namespace ngcore
|
|||||||
SIMD () {}
|
SIMD () {}
|
||||||
SIMD (const SIMD &) = default;
|
SIMD (const SIMD &) = default;
|
||||||
SIMD & operator= (const SIMD &) = default;
|
SIMD & operator= (const SIMD &) = default;
|
||||||
SIMD (int64_t val) { data = val; }
|
SIMD (int val) : data{val} {}
|
||||||
SIMD (std::array<int64_t, 1> arr)
|
SIMD (int64_t val) : data{val} {}
|
||||||
|
SIMD (size_t val) : data(val) {}
|
||||||
|
explicit SIMD (std::array<int64_t, 1> arr)
|
||||||
: data{arr[0]}
|
: data{arr[0]}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
@ -136,16 +138,18 @@ namespace ngcore
|
|||||||
SIMD (const SIMD &) = default;
|
SIMD (const SIMD &) = default;
|
||||||
SIMD & operator= (const SIMD &) = default;
|
SIMD & operator= (const SIMD &) = default;
|
||||||
|
|
||||||
|
SIMD (int val) : lo{val}, high{val} { ; }
|
||||||
SIMD (int64_t val) : lo{val}, high{val} { ; }
|
SIMD (int64_t val) : lo{val}, high{val} { ; }
|
||||||
|
SIMD (size_t val) : lo{val}, high{val} { ; }
|
||||||
SIMD (SIMD<int64_t,N1> lo_, SIMD<int64_t,N2> high_) : lo(lo_), high(high_) { ; }
|
SIMD (SIMD<int64_t,N1> lo_, SIMD<int64_t,N2> high_) : lo(lo_), high(high_) { ; }
|
||||||
|
|
||||||
SIMD( std::array<int64_t, N> arr )
|
explicit SIMD( std::array<int64_t, N> arr )
|
||||||
: lo(detail::array_range<N1>(arr, 0)),
|
: lo(detail::array_range<N1>(arr, 0)),
|
||||||
high(detail::array_range<N2>(arr, N1))
|
high(detail::array_range<N2>(arr, N1))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
template<typename ...T>
|
template<typename ...T>
|
||||||
SIMD(const T... vals)
|
explicit SIMD(const T... vals)
|
||||||
: lo(detail::array_range<N1>(std::array<int64_t, N>{vals...}, 0)),
|
: lo(detail::array_range<N1>(std::array<int64_t, N>{vals...}, 0)),
|
||||||
high(detail::array_range<N2>(std::array<int64_t, N>{vals...}, N1))
|
high(detail::array_range<N2>(std::array<int64_t, N>{vals...}, N1))
|
||||||
{
|
{
|
||||||
@ -204,7 +208,7 @@ namespace ngcore
|
|||||||
SIMD (size_t val) { data = val; }
|
SIMD (size_t val) { data = val; }
|
||||||
SIMD (double const * p) { data = *p; }
|
SIMD (double const * p) { data = *p; }
|
||||||
SIMD (double const * p, SIMD<mask64,1> mask) { data = mask.Data() ? *p : 0.0; }
|
SIMD (double const * p, SIMD<mask64,1> mask) { data = mask.Data() ? *p : 0.0; }
|
||||||
SIMD (std::array<double, 1> arr)
|
explicit SIMD (std::array<double, 1> arr)
|
||||||
: data{arr[0]}
|
: data{arr[0]}
|
||||||
{}
|
{}
|
||||||
|
|
||||||
@ -253,19 +257,17 @@ namespace ngcore
|
|||||||
template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
|
template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
|
||||||
SIMD (const T & func)
|
SIMD (const T & func)
|
||||||
{
|
{
|
||||||
for(auto i : IntRange(N1))
|
double *p = (double*)this;
|
||||||
lo[i] = func(i);
|
for(auto i : IntRange(N))
|
||||||
for(auto i : IntRange(N2))
|
p[i] = func(i);
|
||||||
high[i] = func(N1+i);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
|
template <typename T, typename std::enable_if<std::is_convertible<T,std::function<double(int)>>::value,int>::type = 0>
|
||||||
SIMD & operator= (const T & func)
|
SIMD & operator= (const T & func)
|
||||||
{
|
{
|
||||||
for(auto i : IntRange(N1))
|
double *p = (double*)this;
|
||||||
lo[i] = func(i);
|
for(auto i : IntRange(N))
|
||||||
for(auto i : IntRange(N2))
|
p[i] = func(i);
|
||||||
high[i] = func(N1+i);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -285,13 +287,13 @@ namespace ngcore
|
|||||||
: lo{p, mask.Lo()}, high{p+N1, mask.Hi()}
|
: lo{p, mask.Lo()}, high{p+N1, mask.Hi()}
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
SIMD( std::array<double, N> arr )
|
explicit SIMD( std::array<double, N> arr )
|
||||||
: lo(detail::array_range<N1>(arr, 0)),
|
: lo(detail::array_range<N1>(arr, 0)),
|
||||||
high(detail::array_range<N2>(arr, N1))
|
high(detail::array_range<N2>(arr, N1))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
template<typename ...T>
|
template<typename ...T>
|
||||||
SIMD(const T... vals)
|
explicit SIMD(const T... vals)
|
||||||
: lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)),
|
: lo(detail::array_range<N1>(std::array<double, N>{vals...}, 0)),
|
||||||
high(detail::array_range<N2>(std::array<double, N>{vals...}, N1))
|
high(detail::array_range<N2>(std::array<double, N>{vals...}, N1))
|
||||||
{
|
{
|
||||||
@ -312,7 +314,10 @@ namespace ngcore
|
|||||||
|
|
||||||
template<typename=std::enable_if<N==2>>
|
template<typename=std::enable_if<N==2>>
|
||||||
operator std::tuple<double&,double&> ()
|
operator std::tuple<double&,double&> ()
|
||||||
{ return std::tuple<double&,double&>((*this)[0], (*this)[1]); }
|
{
|
||||||
|
double *p = (double*)this;
|
||||||
|
return std::tuple<double&,double&>(p[0], p[1]);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename=std::enable_if<N==4>>
|
template<typename=std::enable_if<N==4>>
|
||||||
operator std::tuple<double&,double&,double&,double&> ()
|
operator std::tuple<double&,double&,double&,double&> ()
|
||||||
@ -325,6 +330,7 @@ namespace ngcore
|
|||||||
if constexpr(I<N1) return lo.template Get<I>();
|
if constexpr(I<N1) return lo.template Get<I>();
|
||||||
else return high.template Get<I-N1>();
|
else return high.template Get<I-N1>();
|
||||||
}
|
}
|
||||||
|
auto Data() const { return *this; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -359,42 +365,42 @@ namespace ngcore
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int N>
|
template <typename T, int N>
|
||||||
NETGEN_INLINE SIMD<mask64,N> operator< (SIMD<T,N> & a, SIMD<T,N> b)
|
NETGEN_INLINE SIMD<mask64,N> operator< (SIMD<T,N> a, SIMD<T,N> b)
|
||||||
{
|
{
|
||||||
if constexpr(N==1) return a.Data() < b.Data();
|
if constexpr(N==1) return a.Data() < b.Data();
|
||||||
else return { a.Lo()<b.Lo(), a.Hi()<b.Hi() };
|
else return { a.Lo()<b.Lo(), a.Hi()<b.Hi() };
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int N>
|
template <typename T, int N>
|
||||||
NETGEN_INLINE SIMD<mask64,N> operator<= (SIMD<T,N> & a, SIMD<T,N> b)
|
NETGEN_INLINE SIMD<mask64,N> operator<= (SIMD<T,N> a, SIMD<T,N> b)
|
||||||
{
|
{
|
||||||
if constexpr(N==1) return a.Data() <= b.Data();
|
if constexpr(N==1) return a.Data() <= b.Data();
|
||||||
else return { a.Lo()<=b.Lo(), a.Hi()<=b.Hi() };
|
else return { a.Lo()<=b.Lo(), a.Hi()<=b.Hi() };
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int N>
|
template <typename T, int N>
|
||||||
NETGEN_INLINE SIMD<mask64,N> operator> (SIMD<T,N> & a, SIMD<T,N> b)
|
NETGEN_INLINE SIMD<mask64,N> operator> (SIMD<T,N> a, SIMD<T,N> b)
|
||||||
{
|
{
|
||||||
if constexpr(N==1) return a.Data() > b.Data();
|
if constexpr(N==1) return a.Data() > b.Data();
|
||||||
else return { a.Lo()>b.Lo(), a.Hi()>b.Hi() };
|
else return { a.Lo()>b.Lo(), a.Hi()>b.Hi() };
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int N>
|
template <typename T, int N>
|
||||||
NETGEN_INLINE SIMD<mask64,N> operator>= (SIMD<T,N> & a, SIMD<T,N> b)
|
NETGEN_INLINE SIMD<mask64,N> operator>= (SIMD<T,N> a, SIMD<T,N> b)
|
||||||
{
|
{
|
||||||
if constexpr(N==1) return a.Data() >= b.Data();
|
if constexpr(N==1) return a.Data() >= b.Data();
|
||||||
else return { a.Lo()>=b.Lo(), a.Hi()>=b.Hi() };
|
else return { a.Lo()>=b.Lo(), a.Hi()>=b.Hi() };
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int N>
|
template <typename T, int N>
|
||||||
NETGEN_INLINE SIMD<mask64,N> operator== (SIMD<T,N> & a, SIMD<T,N> b)
|
NETGEN_INLINE SIMD<mask64,N> operator== (SIMD<T,N> a, SIMD<T,N> b)
|
||||||
{
|
{
|
||||||
if constexpr(N==1) return a.Data() == b.Data();
|
if constexpr(N==1) return a.Data() == b.Data();
|
||||||
else return { a.Lo()==b.Lo(), a.Hi()==b.Hi() };
|
else return { a.Lo()==b.Lo(), a.Hi()==b.Hi() };
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int N>
|
template <typename T, int N>
|
||||||
NETGEN_INLINE SIMD<mask64,N> operator!= (SIMD<T,N> & a, SIMD<T,N> b)
|
NETGEN_INLINE SIMD<mask64,N> operator!= (SIMD<T,N> a, SIMD<T,N> b)
|
||||||
{
|
{
|
||||||
if constexpr(N==1) return a.Data() != b.Data();
|
if constexpr(N==1) return a.Data() != b.Data();
|
||||||
else return { a.Lo()!=b.Lo(), a.Hi()!=b.Hi() };
|
else return { a.Lo()!=b.Lo(), a.Hi()!=b.Hi() };
|
||||||
@ -547,6 +553,30 @@ namespace ngcore
|
|||||||
return ost;
|
return ost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using std::sqrt;
|
||||||
|
template <int N>
|
||||||
|
NETGEN_INLINE ngcore::SIMD<double,N> sqrt (ngcore::SIMD<double,N> a) {
|
||||||
|
return ngcore::SIMD<double>([a](int i)->double { return sqrt(a[i]); } );
|
||||||
|
}
|
||||||
|
|
||||||
|
using std::fabs;
|
||||||
|
template <int N>
|
||||||
|
NETGEN_INLINE ngcore::SIMD<double,N> fabs (ngcore::SIMD<double,N> a) {
|
||||||
|
return ngcore::SIMD<double>([a](int i)->double { return fabs(a[i]); } );
|
||||||
|
}
|
||||||
|
|
||||||
|
using std::floor;
|
||||||
|
template <int N>
|
||||||
|
NETGEN_INLINE ngcore::SIMD<double,N> floor (ngcore::SIMD<double,N> a) {
|
||||||
|
return ngcore::SIMD<double>([a](int i)->double { return floor(a[i]); } );
|
||||||
|
}
|
||||||
|
|
||||||
|
using std::ceil;
|
||||||
|
template <int N>
|
||||||
|
NETGEN_INLINE ngcore::SIMD<double,N> ceil (ngcore::SIMD<double,N> a) {
|
||||||
|
return ngcore::SIMD<double>([a](int i)->double { return ceil(a[i]); } );
|
||||||
|
}
|
||||||
|
|
||||||
using std::exp;
|
using std::exp;
|
||||||
template <int N>
|
template <int N>
|
||||||
NETGEN_INLINE ngcore::SIMD<double,N> exp (ngcore::SIMD<double,N> a) {
|
NETGEN_INLINE ngcore::SIMD<double,N> exp (ngcore::SIMD<double,N> a) {
|
||||||
|
@ -201,14 +201,14 @@ namespace ngcore
|
|||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t calibrate_init_tsc = __rdtsc();
|
static size_t calibrate_init_tsc = GetTimeCounter();
|
||||||
typedef std::chrono::system_clock TClock;
|
typedef std::chrono::system_clock TClock;
|
||||||
static TClock::time_point calibrate_init_clock = TClock::now();
|
static TClock::time_point calibrate_init_clock = TClock::now();
|
||||||
|
|
||||||
void TaskManager :: StopWorkers()
|
void TaskManager :: StopWorkers()
|
||||||
{
|
{
|
||||||
done = true;
|
done = true;
|
||||||
double delta_tsc = __rdtsc()-calibrate_init_tsc;
|
double delta_tsc = GetTimeCounter()-calibrate_init_tsc;
|
||||||
double delta_sec = std::chrono::duration<double>(TClock::now()-calibrate_init_clock).count();
|
double delta_sec = std::chrono::duration<double>(TClock::now()-calibrate_init_clock).count();
|
||||||
double frequ = (delta_sec != 0) ? delta_tsc/delta_sec : 2.7e9;
|
double frequ = (delta_sec != 0) ? delta_tsc/delta_sec : 2.7e9;
|
||||||
|
|
||||||
@ -421,7 +421,11 @@ namespace ngcore
|
|||||||
if (workers_on_node[j])
|
if (workers_on_node[j])
|
||||||
{
|
{
|
||||||
while (complete[j] != jobnr)
|
while (complete[j] != jobnr)
|
||||||
|
{
|
||||||
|
#ifdef NETGEN_ARCH_AMD64
|
||||||
_mm_pause();
|
_mm_pause();
|
||||||
|
#endif // NETGEN_ARCH_AMD64
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func = nullptr;
|
func = nullptr;
|
||||||
|
@ -8,13 +8,19 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "ngcore_api.hpp" // for NGCORE_API and CPU arch macros
|
||||||
|
|
||||||
|
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
|
||||||
|
#include <mach/mach_time.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef NETGEN_ARCH_AMD64
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#include <intrin.h> // for __rdtsc() CPU time step counter
|
#include <intrin.h> // for __rdtsc() CPU time step counter
|
||||||
#else
|
#else
|
||||||
#include <x86intrin.h> // for __rdtsc() CPU time step counter
|
#include <x86intrin.h> // for __rdtsc() CPU time step counter
|
||||||
#endif // WIN32
|
#endif // WIN32
|
||||||
|
#endif // NETGEN_ARCH_AMD64
|
||||||
#include "ngcore_api.hpp" // for NGCORE_API
|
|
||||||
|
|
||||||
namespace ngcore
|
namespace ngcore
|
||||||
{
|
{
|
||||||
@ -52,7 +58,16 @@ namespace ngcore
|
|||||||
|
|
||||||
inline TTimePoint GetTimeCounter() noexcept
|
inline TTimePoint GetTimeCounter() noexcept
|
||||||
{
|
{
|
||||||
return TTimePoint(__rdtsc());
|
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
|
||||||
|
return mach_absolute_time();
|
||||||
|
#elif defined(NETGEN_ARCH_AMD64)
|
||||||
|
return __rdtsc();
|
||||||
|
#elif defined(NETGEN_ARCH_ARM)
|
||||||
|
return __builtin_readcyclecounter();
|
||||||
|
#else
|
||||||
|
#warning "Unsupported CPU architecture"
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -161,7 +176,9 @@ namespace ngcore
|
|||||||
while (!m.compare_exchange_weak(should, true))
|
while (!m.compare_exchange_weak(should, true))
|
||||||
{
|
{
|
||||||
should = false;
|
should = false;
|
||||||
|
#ifdef NETGEN_ARCH_AMD64
|
||||||
_mm_pause();
|
_mm_pause();
|
||||||
|
#endif // NETGEN_ARCH_AMD64
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void unlock()
|
void unlock()
|
||||||
|
Loading…
Reference in New Issue
Block a user