mirror of
https://github.com/NGSolve/netgen.git
synced 2025-01-11 21:50:34 +05:00
Fix GetTimeCounter for Aarch64 variants
Neither GCC nor Clang define an __arm64__ preprocessor macro, but use __aarch64__ (MSVC uses _MARM_64). Add a "64" suffix to the define, i.e. NETGEN_ARCH_ARM64 to make it more obvious in only refers to aarch64, and to be in line with NETGEN_ARCH_AMD64. Replace the (Clang specific) __builtin_readcyclecounter with inline asm: - The function return cycles (i.e. varies with CPU frequency), not time - It may return 0, depending on the PMU settings - It may cause an illegal instruction, in case it is not trapped by the kernel, e.g. on FreeBSD. Reading the generic timer/counter CNTVCT_EL0 instead of PMCCNTR_EL0 avoids these pitfalls. The inline asm works on GCC and Clang, instead of Clang only for the builtin.
This commit is contained in:
parent
3397e3bb9d
commit
85e8c09ff6
@ -71,7 +71,11 @@
|
|||||||
#define NETGEN_ARCH_AMD64
|
#define NETGEN_ARCH_AMD64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__arm64__) || defined(_M_ARM64)
|
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||||
|
#define NETGEN_ARCH_ARM64
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__arm__) || defined(_M_ARM)
|
||||||
#define NETGEN_ARCH_ARM
|
#define NETGEN_ARCH_ARM
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@
|
|||||||
#include "simd_avx512.hpp"
|
#include "simd_avx512.hpp"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __arm64__
|
#ifdef __aarch64__
|
||||||
#include "simd_arm64.hpp"
|
#include "simd_arm64.hpp"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
#include "ngcore_api.hpp" // for NGCORE_API and CPU arch macros
|
#include "ngcore_api.hpp" // for NGCORE_API and CPU arch macros
|
||||||
|
|
||||||
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
|
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM64)
|
||||||
#include <mach/mach_time.h>
|
#include <mach/mach_time.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -58,12 +58,15 @@ namespace ngcore
|
|||||||
|
|
||||||
inline TTimePoint GetTimeCounter() noexcept
|
inline TTimePoint GetTimeCounter() noexcept
|
||||||
{
|
{
|
||||||
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM)
|
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM64)
|
||||||
return mach_absolute_time();
|
return mach_absolute_time();
|
||||||
#elif defined(NETGEN_ARCH_AMD64)
|
#elif defined(NETGEN_ARCH_AMD64)
|
||||||
return __rdtsc();
|
return __rdtsc();
|
||||||
#elif defined(NETGEN_ARCH_ARM)
|
#elif defined(NETGEN_ARCH_ARM64) && defined(__GNUC__)
|
||||||
return __builtin_readcyclecounter();
|
// __GNUC__ is also defined by CLANG. Use inline asm to read Generic Timer
|
||||||
|
unsigned long long tics;
|
||||||
|
__asm __volatile("mrs %0, CNTVCT_EL0" : "=&r" (tics));
|
||||||
|
return tics;
|
||||||
#else
|
#else
|
||||||
#warning "Unsupported CPU architecture"
|
#warning "Unsupported CPU architecture"
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user