diff --git a/libsrc/core/exception.hpp b/libsrc/core/exception.hpp index 4a003179..32b02cb2 100644 --- a/libsrc/core/exception.hpp +++ b/libsrc/core/exception.hpp @@ -87,18 +87,18 @@ namespace ngcore // Convenience macro to append file name and line of exception origin to the string #define NG_EXCEPTION(s) ngcore::Exception(__FILE__ ":" NETGEN_CORE_NGEXEPTION_STR(__LINE__) "\t"+std::string(s)) -#ifdef NETGEN_ENABLE_CHECK_RANGE +#if defined(NETGEN_ENABLE_CHECK_RANGE) && !defined(__CUDA_ARCH__) #define NETGEN_CHECK_RANGE(value, min, max_plus_one) \ { if ((value)<(min) || (value)>=(max_plus_one)) \ throw ngcore::RangeException(__FILE__ ":" NETGEN_CORE_NGEXEPTION_STR(__LINE__) "\t", (value), (min), (max_plus_one)); } #define NETGEN_CHECK_SHAPE(a,b) \ { if(a.Shape() != b.Shape()) \ throw ngcore::Exception(__FILE__": shape don't match"); } -#else // NETGEN_ENABLE_CHECK_RANGE +#else // defined(NETGEN_ENABLE_CHECK_RANGE) && !defined(__CUDA_ARCH__) #define NETGEN_CHECK_RANGE(value, min, max) #define NETGEN_CHECK_SHAPE(a,b) -#endif // NETGEN_ENABLE_CHECK_RANGE +#endif // defined(NETGEN_ENABLE_CHECK_RANGE) && !defined(__CUDA_ARCH__) diff --git a/libsrc/core/localheap.hpp b/libsrc/core/localheap.hpp index 3bfae3d6..74e80a5e 100644 --- a/libsrc/core/localheap.hpp +++ b/libsrc/core/localheap.hpp @@ -136,7 +136,7 @@ public: /// free memory - NETGEN_INLINE virtual ~LocalHeap () + virtual ~LocalHeap () { if (owner) delete [] data; diff --git a/libsrc/core/ngcore_api.hpp b/libsrc/core/ngcore_api.hpp index 9c977c1c..e66e9b87 100644 --- a/libsrc/core/ngcore_api.hpp +++ b/libsrc/core/ngcore_api.hpp @@ -48,20 +48,29 @@ #define NGCORE_API NGCORE_API_IMPORT #endif +// Set __host__ __device__ for all inline functions +#ifdef __CUDACC__ + #define NETGEN_HD __host__ __device__ +#else // __CUDACC__ + #define NETGEN_HD +#endif // __CUDACC__ + #ifdef __INTEL_COMPILER + #define NETGEN_ALWAYS_INLINE __forceinline + #define NETGEN_INLINE __forceinline inline #ifdef WIN32 - #define NETGEN_INLINE __forceinline inline #define NETGEN_LAMBDA_INLINE #else - #define NETGEN_INLINE __forceinline inline #define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__)) #endif #else #ifdef __GNUC__ - #define NETGEN_INLINE __attribute__ ((__always_inline__)) inline - #define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__)) + #define NETGEN_ALWAYS_INLINE __attribute__ ((__always_inline__)) + #define NETGEN_INLINE __attribute__ ((__always_inline__)) inline NETGEN_HD + #define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__)) NETGEN_HD #define NETGEN_VLA #else + #define NETGEN_ALWAYS_INLINE #define NETGEN_INLINE inline #define NETGEN_LAMBDA_INLINE #endif diff --git a/libsrc/core/simd.hpp b/libsrc/core/simd.hpp index 4a646dca..2d85a79b 100644 --- a/libsrc/core/simd.hpp +++ b/libsrc/core/simd.hpp @@ -11,6 +11,8 @@ #include "simd_generic.hpp" +#ifndef __CUDA_ARCH__ + #ifdef NETGEN_ARCH_AMD64 #ifndef __SSE__ #define __SSE__ @@ -30,8 +32,11 @@ #include "simd_arm64.hpp" #endif +#endif // __CUDA_ARCH__ + namespace ngcore { +#ifndef __CUDA_ARCH__ #ifdef NETGEN_ARCH_AMD64 NETGEN_INLINE auto HSum (SIMD v1, SIMD v2, SIMD v3, SIMD v4) { @@ -45,6 +50,7 @@ namespace ngcore return SIMD::GetMaskFromBits(i); } #endif +#endif // __CUDA_ARCH__ NETGEN_INLINE void SIMDTranspose (SIMD a1, SIMD a2, SIMD a3, SIMD a4, SIMD & b1, SIMD & b2, SIMD & b3, SIMD & b4)