From 5fb91f26ed5ca964a105868fffa377efbb2a2b27 Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Wed, 10 Jul 2019 13:21:57 +0200 Subject: [PATCH] User AlignedAlloc for over-aligned types MacOS < 10.13 doesn't support new with alignment>16 bytes --- CMakeLists.txt | 1 + libsrc/core/taskmanager.hpp | 4 ++-- libsrc/core/utils.hpp | 29 +++++++++++++++++++++++++++ libsrc/general/ngarray.hpp | 12 ++++++------ libsrc/general/ngsimd.hpp | 39 +++---------------------------------- 5 files changed, 41 insertions(+), 44 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2e899ac1..d573504d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,6 +115,7 @@ endif (ADDITIONAL_PATHS) ####################################################################### # build options include_directories ("${PROJECT_SOURCE_DIR}/include") +include_directories ("${PROJECT_SOURCE_DIR}/libsrc") include_directories ("${PROJECT_SOURCE_DIR}/libsrc/include") include_directories ("${PROJECT_BINARY_DIR}") diff --git a/libsrc/core/taskmanager.hpp b/libsrc/core/taskmanager.hpp index e9056659..059a1678 100644 --- a/libsrc/core/taskmanager.hpp +++ b/libsrc/core/taskmanager.hpp @@ -40,7 +40,7 @@ namespace ngcore { // PajeTrace *trace; - class alignas(64) NodeData //: public AlignedAlloc + class alignas(64) NodeData : public AlignedAlloc { public: atomic start_cnt{0}; @@ -390,7 +390,7 @@ public: - class alignas(4096) AtomicRange //: public AlignedAlloc + class alignas(4096) AtomicRange : public AlignedAlloc { atomic begin; atomic end; diff --git a/libsrc/core/utils.hpp b/libsrc/core/utils.hpp index 4dca6c8b..35877d42 100644 --- a/libsrc/core/utils.hpp +++ b/libsrc/core/utils.hpp @@ -74,6 +74,35 @@ namespace ngcore b = std::move(temp); } + template + class AlignedAlloc + { + protected: + static void * aligned_malloc(size_t s) + { + // Assume 16 byte alignment of standard library + if(alignof(T)<=16) + return malloc(s); + else + return _mm_malloc(s, alignof(T)); + } + + static void aligned_free(void *p) + { + if(alignof(T)<=16) + free(p); + else + _mm_free(p); + } + + public: + void * operator new (size_t s, void *p) { return p; } + void * operator new (size_t s) { return aligned_malloc(s); } + void * operator new[] (size_t s) { return aligned_malloc(s); } + void operator delete (void * p) { aligned_free(p); } + void operator delete[] (void * p) { aligned_free(p); } + }; + } // namespace ngcore #endif // NETGEN_CORE_UTILS_HPP diff --git a/libsrc/general/ngarray.hpp b/libsrc/general/ngarray.hpp index 2c4aed21..4b66d65d 100644 --- a/libsrc/general/ngarray.hpp +++ b/libsrc/general/ngarray.hpp @@ -388,10 +388,10 @@ namespace netgen NgArray & operator= (NgArray && a2) { - Swap (data, a2.data); - Swap (size, a2.size); - Swap (allocsize, a2.allocsize); - Swap (ownmem, a2.ownmem); + ngcore::Swap (data, a2.data); + ngcore::Swap (size, a2.size); + ngcore::Swap (allocsize, a2.allocsize); + ngcore::Swap (ownmem, a2.ownmem); return *this; } @@ -745,8 +745,8 @@ namespace netgen if (i <= j) { - Swap (data[i], data[j]); - Swap (slave[i], slave[j]); + ngcore::Swap (data[i], data[j]); + ngcore::Swap (slave[i], slave[j]); i++; j--; } } diff --git a/libsrc/general/ngsimd.hpp b/libsrc/general/ngsimd.hpp index 97e92052..feba2523 100644 --- a/libsrc/general/ngsimd.hpp +++ b/libsrc/general/ngsimd.hpp @@ -13,6 +13,8 @@ #include #include +#include + #ifdef WIN32 #ifndef AVX_OPERATORS_DEFINED #define AVX_OPERATORS_DEFINED @@ -48,6 +50,7 @@ NG_INLINE __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; } namespace ngsimd { + using ngcore::AlignedAlloc; // MSVC does not define SSE. It's always present on 64bit cpus #if (defined(_M_AMD64) || defined(_M_X64) || defined(__AVX__)) @@ -121,42 +124,6 @@ namespace ngsimd NG_INLINE SIMD operator/ (SIMD a, T b) { return a / SIMD(b); } -#ifdef __AVX__ - template - class AlignedAlloc - { - protected: - static void * aligned_malloc(size_t s) - { - // Assume 16 byte alignment of standard library - if(alignof(T)<=16) - return malloc(s); - else - return _mm_malloc(s, alignof(T)); - } - - static void aligned_free(void *p) - { - if(alignof(T)<=16) - free(p); - else - _mm_free(p); - } - - public: - void * operator new (size_t s, void *p) { return p; } - void * operator new (size_t s) { return aligned_malloc(s); } - void * operator new[] (size_t s) { return aligned_malloc(s); } - void operator delete (void * p) { aligned_free(p); } - void operator delete[] (void * p) { aligned_free(p); } - }; -#else - // it's only a dummy without AVX - template - class AlignedAlloc { ; }; - -#endif - using std::sqrt; using std::fabs;