merge amd64 and arm code

This commit is contained in:
Matthias Hochsteger 2020-12-07 10:58:39 +01:00
parent 82f817946f
commit fc835e25a2
5 changed files with 32 additions and 23 deletions

View File

@ -197,7 +197,6 @@ if(HAVE_DLFCN_H)
endif()
include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
include_directories(BEFORE /usr/aarch64-linux-gnu/include)
#######################################################################
# platform specific definitions and flags

View File

@ -67,6 +67,14 @@
#endif
#endif
#if defined(__amd64__) || defined(_M_AMD64)
#define NETGEN_ARCH_AMD64
#endif
#if defined(__arm__) || defined(_M_ARM)
#define NETGEN_ARCH_ARM
#endif
#ifdef __MAC_OS_X_VERSION_MIN_REQUIRED
#if __MAC_OS_X_VERSION_MIN_REQUIRED < 101400
// The c++ standard library on MacOS 10.13 and earlier has no aligned new operator,

View File

@ -7,7 +7,10 @@
#include <ostream>
#include <sstream>
#include <string>
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
#include <mach/mach_time.h>
#endif
/*
#ifdef WIN32
@ -55,11 +58,16 @@ namespace ngcore
inline TTimePoint GetTimeCounter() noexcept
{
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
return mach_absolute_time();
// return __builtin_readcyclecounter();
// return TTimePoint(__rdtsc());
// return TTimePoint(__builtin_readcyclecounter());
// return TTimePoint(0);
#elif defined(NETGEN_ARCH_AMD64)
return __rdtsc();
#elif defined(NETGEN_ARCH_ARM)
return __builtin_readcyclecounter();
#else
#warning "Unsupported CPU architecture"
return 0;
#endif
}
template <class T>

View File

@ -6,7 +6,6 @@
/* Date: 25. Mar. 16 */
/**************************************************************************/
// #include <immintrin.h>
#include <tuple>
#include <ostream>
#include <stdexcept>
@ -15,6 +14,10 @@
#include <core/utils.hpp>
#ifdef NETGEN_ARCH_AMD64
#include <immintrin.h>
#endif // NETGEN_ARCH_AMD64
#ifdef WIN32
#ifndef AVX_OPERATORS_DEFINED
#define AVX_OPERATORS_DEFINED
@ -50,23 +53,12 @@ NG_INLINE __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; }
namespace ngsimd
{
// MSVC does not define SSE. It's always present on 64bit cpus
#if (defined(_M_AMD64) || defined(_M_X64) || defined(__AVX__))
#ifndef __SSE__
#define __SSE__
#endif
#ifndef __SSE2__
#define __SSE2__
#endif
#endif
constexpr int GetDefaultSIMDSize() {
#if defined __AVX512F__
return 8;
#elif defined __AVX__
return 4;
#elif defined __SSE__
#elif defined NETGEN_ARCH_AMD64
return 2;
#else
return 1;
@ -74,12 +66,15 @@ namespace ngsimd
}
#if defined __AVX512F__
#define NETGEN_HAVE_SIMD
typedef __m512 tAVX;
typedef __m512d tAVXd;
#elif defined __AVX__
#define NETGEN_HAVE_SIMD
typedef __m256 tAVX;
typedef __m256d tAVXd;
#elif defined __SSE__
#elif defined NETGEN_ARCH_AMD64
#define NETGEN_HAVE_SIMD
typedef __m128 tAVX;
typedef __m128d tAVXd;
#endif
@ -256,7 +251,7 @@ using std::fabs;
/////////////////////////////////////////////////////////////////////////////
// SSE - Simd width 2
/////////////////////////////////////////////////////////////////////////////
#ifdef __SSE__
#ifdef NETGEN_ARCH_AMD64
template<>
class alignas(16) SIMD<double,2>
{
@ -349,7 +344,7 @@ using std::fabs;
return SIMD<double,4>(hsum);
}
*/
#endif // __SSE__
#endif // NETGEN_ARCH_AMD64

View File

@ -779,8 +779,7 @@ namespace netgen
#ifdef __SSE__
#include <immintrin.h>
#ifdef NETGEN_HAVE_SIMD
template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<1,1> (int elnr, int npts,