Fix GetTimeCounter for Aarch64 variants

Neither GCC nor Clang define an __arm64__ preprocessor macro, but use
__aarch64__ (MSVC uses _MARM_64). Add a "64" suffix to the define, i.e.
NETGEN_ARCH_ARM64 to make it more obvious in only refers to aarch64, and
to be in line with NETGEN_ARCH_AMD64.

Replace the (Clang specific) __builtin_readcyclecounter with inline
asm:
- The function return cycles (i.e. varies with CPU frequency), not time
- It may return 0, depending on the PMU settings
- It may cause an illegal instruction, in case it is not trapped by the
  kernel, e.g. on FreeBSD.

Reading the generic timer/counter CNTVCT_EL0 instead of PMCCNTR_EL0 avoids
these pitfalls. The inline asm works on GCC and Clang, instead of
Clang only for the builtin.
This commit is contained in:
Stefan Brüns 2021-03-03 17:03:29 +01:00
parent 3397e3bb9d
commit 85e8c09ff6
3 changed files with 13 additions and 6 deletions

View File

@ -71,7 +71,11 @@
#define NETGEN_ARCH_AMD64 #define NETGEN_ARCH_AMD64
#endif #endif
#if defined(__arm64__) || defined(_M_ARM64) #if defined(__aarch64__) || defined(_M_ARM64)
#define NETGEN_ARCH_ARM64
#endif
#if defined(__arm__) || defined(_M_ARM)
#define NETGEN_ARCH_ARM #define NETGEN_ARCH_ARM
#endif #endif

View File

@ -26,7 +26,7 @@
#include "simd_avx512.hpp" #include "simd_avx512.hpp"
#endif #endif
#ifdef __arm64__ #ifdef __aarch64__
#include "simd_arm64.hpp" #include "simd_arm64.hpp"
#endif #endif

View File

@ -10,7 +10,7 @@
#include "ngcore_api.hpp" // for NGCORE_API and CPU arch macros #include "ngcore_api.hpp" // for NGCORE_API and CPU arch macros
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM) #if defined(__APPLE__) && defined(NETGEN_ARCH_ARM64)
#include <mach/mach_time.h> #include <mach/mach_time.h>
#endif #endif
@ -58,12 +58,15 @@ namespace ngcore
inline TTimePoint GetTimeCounter() noexcept inline TTimePoint GetTimeCounter() noexcept
{ {
#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM) #if defined(__APPLE__) && defined(NETGEN_ARCH_ARM64)
return mach_absolute_time(); return mach_absolute_time();
#elif defined(NETGEN_ARCH_AMD64) #elif defined(NETGEN_ARCH_AMD64)
return __rdtsc(); return __rdtsc();
#elif defined(NETGEN_ARCH_ARM) #elif defined(NETGEN_ARCH_ARM64) && defined(__GNUC__)
return __builtin_readcyclecounter(); // __GNUC__ is also defined by CLANG. Use inline asm to read Generic Timer
unsigned long long tics;
__asm __volatile("mrs %0, CNTVCT_EL0" : "=&r" (tics));
return tics;
#else #else
#warning "Unsupported CPU architecture" #warning "Unsupported CPU architecture"
return 0; return 0;