From 85e8c09ff6626b12480f4919a26a7086d4c20579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Wed, 3 Mar 2021 17:03:29 +0100 Subject: [PATCH] Fix GetTimeCounter for Aarch64 variants Neither GCC nor Clang define an __arm64__ preprocessor macro, but use __aarch64__ (MSVC uses _MARM_64). Add a "64" suffix to the define, i.e. NETGEN_ARCH_ARM64 to make it more obvious in only refers to aarch64, and to be in line with NETGEN_ARCH_AMD64. Replace the (Clang specific) __builtin_readcyclecounter with inline asm: - The function return cycles (i.e. varies with CPU frequency), not time - It may return 0, depending on the PMU settings - It may cause an illegal instruction, in case it is not trapped by the kernel, e.g. on FreeBSD. Reading the generic timer/counter CNTVCT_EL0 instead of PMCCNTR_EL0 avoids these pitfalls. The inline asm works on GCC and Clang, instead of Clang only for the builtin. --- libsrc/core/ngcore_api.hpp | 6 +++++- libsrc/core/simd.hpp | 2 +- libsrc/core/utils.hpp | 11 +++++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/libsrc/core/ngcore_api.hpp b/libsrc/core/ngcore_api.hpp index 330e7e33..9c977c1c 100644 --- a/libsrc/core/ngcore_api.hpp +++ b/libsrc/core/ngcore_api.hpp @@ -71,7 +71,11 @@ #define NETGEN_ARCH_AMD64 #endif -#if defined(__arm64__) || defined(_M_ARM64) +#if defined(__aarch64__) || defined(_M_ARM64) +#define NETGEN_ARCH_ARM64 +#endif + +#if defined(__arm__) || defined(_M_ARM) #define NETGEN_ARCH_ARM #endif diff --git a/libsrc/core/simd.hpp b/libsrc/core/simd.hpp index e809d6fe..3459e66d 100644 --- a/libsrc/core/simd.hpp +++ b/libsrc/core/simd.hpp @@ -26,7 +26,7 @@ #include "simd_avx512.hpp" #endif -#ifdef __arm64__ +#ifdef __aarch64__ #include "simd_arm64.hpp" #endif diff --git a/libsrc/core/utils.hpp b/libsrc/core/utils.hpp index ca015ae3..102ff319 100644 --- a/libsrc/core/utils.hpp +++ b/libsrc/core/utils.hpp @@ -10,7 +10,7 @@ #include "ngcore_api.hpp" // for NGCORE_API and CPU arch macros -#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM) +#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM64) #include #endif @@ -58,12 +58,15 @@ namespace ngcore inline TTimePoint GetTimeCounter() noexcept { -#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM) +#if defined(__APPLE__) && defined(NETGEN_ARCH_ARM64) return mach_absolute_time(); #elif defined(NETGEN_ARCH_AMD64) return __rdtsc(); -#elif defined(NETGEN_ARCH_ARM) - return __builtin_readcyclecounter(); +#elif defined(NETGEN_ARCH_ARM64) && defined(__GNUC__) + // __GNUC__ is also defined by CLANG. Use inline asm to read Generic Timer + unsigned long long tics; + __asm __volatile("mrs %0, CNTVCT_EL0" : "=&r" (tics)); + return tics; #else #warning "Unsupported CPU architecture" return 0;