Merge branch 'cuda' into 'master'

Some cuda fixes

See merge request ngsolve/netgen!556
This commit is contained in:
Hochsteger, Matthias 2023-02-16 12:55:12 +01:00
commit 33a76ad6ba
4 changed files with 23 additions and 8 deletions

View File

@ -87,18 +87,18 @@ namespace ngcore
// Convenience macro to append file name and line of exception origin to the string // Convenience macro to append file name and line of exception origin to the string
#define NG_EXCEPTION(s) ngcore::Exception(__FILE__ ":" NETGEN_CORE_NGEXEPTION_STR(__LINE__) "\t"+std::string(s)) #define NG_EXCEPTION(s) ngcore::Exception(__FILE__ ":" NETGEN_CORE_NGEXEPTION_STR(__LINE__) "\t"+std::string(s))
#ifdef NETGEN_ENABLE_CHECK_RANGE #if defined(NETGEN_ENABLE_CHECK_RANGE) && !defined(__CUDA_ARCH__)
#define NETGEN_CHECK_RANGE(value, min, max_plus_one) \ #define NETGEN_CHECK_RANGE(value, min, max_plus_one) \
{ if ((value)<(min) || (value)>=(max_plus_one)) \ { if ((value)<(min) || (value)>=(max_plus_one)) \
throw ngcore::RangeException(__FILE__ ":" NETGEN_CORE_NGEXEPTION_STR(__LINE__) "\t", (value), (min), (max_plus_one)); } throw ngcore::RangeException(__FILE__ ":" NETGEN_CORE_NGEXEPTION_STR(__LINE__) "\t", (value), (min), (max_plus_one)); }
#define NETGEN_CHECK_SHAPE(a,b) \ #define NETGEN_CHECK_SHAPE(a,b) \
{ if(a.Shape() != b.Shape()) \ { if(a.Shape() != b.Shape()) \
throw ngcore::Exception(__FILE__": shape don't match"); } throw ngcore::Exception(__FILE__": shape don't match"); }
#else // NETGEN_ENABLE_CHECK_RANGE #else // defined(NETGEN_ENABLE_CHECK_RANGE) && !defined(__CUDA_ARCH__)
#define NETGEN_CHECK_RANGE(value, min, max) #define NETGEN_CHECK_RANGE(value, min, max)
#define NETGEN_CHECK_SHAPE(a,b) #define NETGEN_CHECK_SHAPE(a,b)
#endif // NETGEN_ENABLE_CHECK_RANGE #endif // defined(NETGEN_ENABLE_CHECK_RANGE) && !defined(__CUDA_ARCH__)

View File

@ -136,7 +136,7 @@ public:
/// free memory /// free memory
NETGEN_INLINE virtual ~LocalHeap () virtual ~LocalHeap ()
{ {
if (owner) if (owner)
delete [] data; delete [] data;

View File

@ -48,20 +48,29 @@
#define NGCORE_API NGCORE_API_IMPORT #define NGCORE_API NGCORE_API_IMPORT
#endif #endif
// Set __host__ __device__ for all inline functions
#ifdef __CUDACC__
#define NETGEN_HD __host__ __device__
#else // __CUDACC__
#define NETGEN_HD
#endif // __CUDACC__
#ifdef __INTEL_COMPILER #ifdef __INTEL_COMPILER
#define NETGEN_ALWAYS_INLINE __forceinline
#define NETGEN_INLINE __forceinline inline
#ifdef WIN32 #ifdef WIN32
#define NETGEN_INLINE __forceinline inline
#define NETGEN_LAMBDA_INLINE #define NETGEN_LAMBDA_INLINE
#else #else
#define NETGEN_INLINE __forceinline inline
#define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__)) #define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__))
#endif #endif
#else #else
#ifdef __GNUC__ #ifdef __GNUC__
#define NETGEN_INLINE __attribute__ ((__always_inline__)) inline #define NETGEN_ALWAYS_INLINE __attribute__ ((__always_inline__))
#define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__)) #define NETGEN_INLINE __attribute__ ((__always_inline__)) inline NETGEN_HD
#define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__)) NETGEN_HD
#define NETGEN_VLA #define NETGEN_VLA
#else #else
#define NETGEN_ALWAYS_INLINE
#define NETGEN_INLINE inline #define NETGEN_INLINE inline
#define NETGEN_LAMBDA_INLINE #define NETGEN_LAMBDA_INLINE
#endif #endif

View File

@ -11,6 +11,8 @@
#include "simd_generic.hpp" #include "simd_generic.hpp"
#ifndef __CUDA_ARCH__
#ifdef NETGEN_ARCH_AMD64 #ifdef NETGEN_ARCH_AMD64
#ifndef __SSE__ #ifndef __SSE__
#define __SSE__ #define __SSE__
@ -30,8 +32,11 @@
#include "simd_arm64.hpp" #include "simd_arm64.hpp"
#endif #endif
#endif // __CUDA_ARCH__
namespace ngcore namespace ngcore
{ {
#ifndef __CUDA_ARCH__
#ifdef NETGEN_ARCH_AMD64 #ifdef NETGEN_ARCH_AMD64
NETGEN_INLINE auto HSum (SIMD<double,2> v1, SIMD<double,2> v2, SIMD<double,2> v3, SIMD<double,2> v4) NETGEN_INLINE auto HSum (SIMD<double,2> v1, SIMD<double,2> v2, SIMD<double,2> v3, SIMD<double,2> v4)
{ {
@ -45,6 +50,7 @@ namespace ngcore
return SIMD<mask64>::GetMaskFromBits(i); return SIMD<mask64>::GetMaskFromBits(i);
} }
#endif #endif
#endif // __CUDA_ARCH__
NETGEN_INLINE void SIMDTranspose (SIMD<double,4> a1, SIMD<double,4> a2, SIMD <double,4> a3, SIMD<double,4> a4, NETGEN_INLINE void SIMDTranspose (SIMD<double,4> a1, SIMD<double,4> a2, SIMD <double,4> a3, SIMD<double,4> a4,
SIMD<double,4> & b1, SIMD<double,4> & b2, SIMD<double,4> & b3, SIMD<double,4> & b4) SIMD<double,4> & b1, SIMD<double,4> & b2, SIMD<double,4> & b3, SIMD<double,4> & b4)