mirror of
https://github.com/NGSolve/netgen.git
synced 2025-01-27 13:20:34 +05:00
User AlignedAlloc for over-aligned types
MacOS < 10.13 doesn't support new with alignment>16 bytes
This commit is contained in:
parent
c3a7fc2aab
commit
5fb91f26ed
@ -115,6 +115,7 @@ endif (ADDITIONAL_PATHS)
|
|||||||
#######################################################################
|
#######################################################################
|
||||||
# build options
|
# build options
|
||||||
include_directories ("${PROJECT_SOURCE_DIR}/include")
|
include_directories ("${PROJECT_SOURCE_DIR}/include")
|
||||||
|
include_directories ("${PROJECT_SOURCE_DIR}/libsrc")
|
||||||
include_directories ("${PROJECT_SOURCE_DIR}/libsrc/include")
|
include_directories ("${PROJECT_SOURCE_DIR}/libsrc/include")
|
||||||
include_directories ("${PROJECT_BINARY_DIR}")
|
include_directories ("${PROJECT_BINARY_DIR}")
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ namespace ngcore
|
|||||||
{
|
{
|
||||||
// PajeTrace *trace;
|
// PajeTrace *trace;
|
||||||
|
|
||||||
class alignas(64) NodeData //: public AlignedAlloc<NodeData>
|
class alignas(64) NodeData : public AlignedAlloc<NodeData>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
atomic<int> start_cnt{0};
|
atomic<int> start_cnt{0};
|
||||||
@ -390,7 +390,7 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
class alignas(4096) AtomicRange //: public AlignedAlloc<AtomicRange>
|
class alignas(4096) AtomicRange : public AlignedAlloc<AtomicRange>
|
||||||
{
|
{
|
||||||
atomic<size_t> begin;
|
atomic<size_t> begin;
|
||||||
atomic<size_t> end;
|
atomic<size_t> end;
|
||||||
|
@ -74,6 +74,35 @@ namespace ngcore
|
|||||||
b = std::move(temp);
|
b = std::move(temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class AlignedAlloc
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
static void * aligned_malloc(size_t s)
|
||||||
|
{
|
||||||
|
// Assume 16 byte alignment of standard library
|
||||||
|
if(alignof(T)<=16)
|
||||||
|
return malloc(s);
|
||||||
|
else
|
||||||
|
return _mm_malloc(s, alignof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void aligned_free(void *p)
|
||||||
|
{
|
||||||
|
if(alignof(T)<=16)
|
||||||
|
free(p);
|
||||||
|
else
|
||||||
|
_mm_free(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
void * operator new (size_t s, void *p) { return p; }
|
||||||
|
void * operator new (size_t s) { return aligned_malloc(s); }
|
||||||
|
void * operator new[] (size_t s) { return aligned_malloc(s); }
|
||||||
|
void operator delete (void * p) { aligned_free(p); }
|
||||||
|
void operator delete[] (void * p) { aligned_free(p); }
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace ngcore
|
} // namespace ngcore
|
||||||
|
|
||||||
#endif // NETGEN_CORE_UTILS_HPP
|
#endif // NETGEN_CORE_UTILS_HPP
|
||||||
|
@ -388,10 +388,10 @@ namespace netgen
|
|||||||
|
|
||||||
NgArray & operator= (NgArray && a2)
|
NgArray & operator= (NgArray && a2)
|
||||||
{
|
{
|
||||||
Swap (data, a2.data);
|
ngcore::Swap (data, a2.data);
|
||||||
Swap (size, a2.size);
|
ngcore::Swap (size, a2.size);
|
||||||
Swap (allocsize, a2.allocsize);
|
ngcore::Swap (allocsize, a2.allocsize);
|
||||||
Swap (ownmem, a2.ownmem);
|
ngcore::Swap (ownmem, a2.ownmem);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -745,8 +745,8 @@ namespace netgen
|
|||||||
|
|
||||||
if (i <= j)
|
if (i <= j)
|
||||||
{
|
{
|
||||||
Swap (data[i], data[j]);
|
ngcore::Swap (data[i], data[j]);
|
||||||
Swap (slave[i], slave[j]);
|
ngcore::Swap (slave[i], slave[j]);
|
||||||
i++; j--;
|
i++; j--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
|
#include <core/utils.hpp>
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#ifndef AVX_OPERATORS_DEFINED
|
#ifndef AVX_OPERATORS_DEFINED
|
||||||
#define AVX_OPERATORS_DEFINED
|
#define AVX_OPERATORS_DEFINED
|
||||||
@ -48,6 +50,7 @@ NG_INLINE __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; }
|
|||||||
|
|
||||||
namespace ngsimd
|
namespace ngsimd
|
||||||
{
|
{
|
||||||
|
using ngcore::AlignedAlloc;
|
||||||
|
|
||||||
// MSVC does not define SSE. It's always present on 64bit cpus
|
// MSVC does not define SSE. It's always present on 64bit cpus
|
||||||
#if (defined(_M_AMD64) || defined(_M_X64) || defined(__AVX__))
|
#if (defined(_M_AMD64) || defined(_M_X64) || defined(__AVX__))
|
||||||
@ -121,42 +124,6 @@ namespace ngsimd
|
|||||||
NG_INLINE SIMD<double,N> operator/ (SIMD<double,N> a, T b) { return a / SIMD<double,N>(b); }
|
NG_INLINE SIMD<double,N> operator/ (SIMD<double,N> a, T b) { return a / SIMD<double,N>(b); }
|
||||||
|
|
||||||
|
|
||||||
#ifdef __AVX__
|
|
||||||
template <typename T>
|
|
||||||
class AlignedAlloc
|
|
||||||
{
|
|
||||||
protected:
|
|
||||||
static void * aligned_malloc(size_t s)
|
|
||||||
{
|
|
||||||
// Assume 16 byte alignment of standard library
|
|
||||||
if(alignof(T)<=16)
|
|
||||||
return malloc(s);
|
|
||||||
else
|
|
||||||
return _mm_malloc(s, alignof(T));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void aligned_free(void *p)
|
|
||||||
{
|
|
||||||
if(alignof(T)<=16)
|
|
||||||
free(p);
|
|
||||||
else
|
|
||||||
_mm_free(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
void * operator new (size_t s, void *p) { return p; }
|
|
||||||
void * operator new (size_t s) { return aligned_malloc(s); }
|
|
||||||
void * operator new[] (size_t s) { return aligned_malloc(s); }
|
|
||||||
void operator delete (void * p) { aligned_free(p); }
|
|
||||||
void operator delete[] (void * p) { aligned_free(p); }
|
|
||||||
};
|
|
||||||
#else
|
|
||||||
// it's only a dummy without AVX
|
|
||||||
template <typename T>
|
|
||||||
class AlignedAlloc { ; };
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
using std::sqrt;
|
using std::sqrt;
|
||||||
using std::fabs;
|
using std::fabs;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user