mirror of
https://github.com/NGSolve/netgen.git
synced 2024-12-26 05:50:32 +05:00
Merge branch 'cuda' into 'master'
Some cuda fixes See merge request ngsolve/netgen!556
This commit is contained in:
commit
33a76ad6ba
@ -87,18 +87,18 @@ namespace ngcore
|
|||||||
// Convenience macro to append file name and line of exception origin to the string
|
// Convenience macro to append file name and line of exception origin to the string
|
||||||
#define NG_EXCEPTION(s) ngcore::Exception(__FILE__ ":" NETGEN_CORE_NGEXEPTION_STR(__LINE__) "\t"+std::string(s))
|
#define NG_EXCEPTION(s) ngcore::Exception(__FILE__ ":" NETGEN_CORE_NGEXEPTION_STR(__LINE__) "\t"+std::string(s))
|
||||||
|
|
||||||
#ifdef NETGEN_ENABLE_CHECK_RANGE
|
#if defined(NETGEN_ENABLE_CHECK_RANGE) && !defined(__CUDA_ARCH__)
|
||||||
#define NETGEN_CHECK_RANGE(value, min, max_plus_one) \
|
#define NETGEN_CHECK_RANGE(value, min, max_plus_one) \
|
||||||
{ if ((value)<(min) || (value)>=(max_plus_one)) \
|
{ if ((value)<(min) || (value)>=(max_plus_one)) \
|
||||||
throw ngcore::RangeException(__FILE__ ":" NETGEN_CORE_NGEXEPTION_STR(__LINE__) "\t", (value), (min), (max_plus_one)); }
|
throw ngcore::RangeException(__FILE__ ":" NETGEN_CORE_NGEXEPTION_STR(__LINE__) "\t", (value), (min), (max_plus_one)); }
|
||||||
#define NETGEN_CHECK_SHAPE(a,b) \
|
#define NETGEN_CHECK_SHAPE(a,b) \
|
||||||
{ if(a.Shape() != b.Shape()) \
|
{ if(a.Shape() != b.Shape()) \
|
||||||
throw ngcore::Exception(__FILE__": shape don't match"); }
|
throw ngcore::Exception(__FILE__": shape don't match"); }
|
||||||
#else // NETGEN_ENABLE_CHECK_RANGE
|
#else // defined(NETGEN_ENABLE_CHECK_RANGE) && !defined(__CUDA_ARCH__)
|
||||||
#define NETGEN_CHECK_RANGE(value, min, max)
|
#define NETGEN_CHECK_RANGE(value, min, max)
|
||||||
#define NETGEN_CHECK_SHAPE(a,b)
|
#define NETGEN_CHECK_SHAPE(a,b)
|
||||||
|
|
||||||
#endif // NETGEN_ENABLE_CHECK_RANGE
|
#endif // defined(NETGEN_ENABLE_CHECK_RANGE) && !defined(__CUDA_ARCH__)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -136,7 +136,7 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
/// free memory
|
/// free memory
|
||||||
NETGEN_INLINE virtual ~LocalHeap ()
|
virtual ~LocalHeap ()
|
||||||
{
|
{
|
||||||
if (owner)
|
if (owner)
|
||||||
delete [] data;
|
delete [] data;
|
||||||
|
@ -48,20 +48,29 @@
|
|||||||
#define NGCORE_API NGCORE_API_IMPORT
|
#define NGCORE_API NGCORE_API_IMPORT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Set __host__ __device__ for all inline functions
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
#define NETGEN_HD __host__ __device__
|
||||||
|
#else // __CUDACC__
|
||||||
|
#define NETGEN_HD
|
||||||
|
#endif // __CUDACC__
|
||||||
|
|
||||||
#ifdef __INTEL_COMPILER
|
#ifdef __INTEL_COMPILER
|
||||||
#ifdef WIN32
|
#define NETGEN_ALWAYS_INLINE __forceinline
|
||||||
#define NETGEN_INLINE __forceinline inline
|
#define NETGEN_INLINE __forceinline inline
|
||||||
|
#ifdef WIN32
|
||||||
#define NETGEN_LAMBDA_INLINE
|
#define NETGEN_LAMBDA_INLINE
|
||||||
#else
|
#else
|
||||||
#define NETGEN_INLINE __forceinline inline
|
|
||||||
#define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__))
|
#define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__))
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
#define NETGEN_INLINE __attribute__ ((__always_inline__)) inline
|
#define NETGEN_ALWAYS_INLINE __attribute__ ((__always_inline__))
|
||||||
#define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__))
|
#define NETGEN_INLINE __attribute__ ((__always_inline__)) inline NETGEN_HD
|
||||||
|
#define NETGEN_LAMBDA_INLINE __attribute__ ((__always_inline__)) NETGEN_HD
|
||||||
#define NETGEN_VLA
|
#define NETGEN_VLA
|
||||||
#else
|
#else
|
||||||
|
#define NETGEN_ALWAYS_INLINE
|
||||||
#define NETGEN_INLINE inline
|
#define NETGEN_INLINE inline
|
||||||
#define NETGEN_LAMBDA_INLINE
|
#define NETGEN_LAMBDA_INLINE
|
||||||
#endif
|
#endif
|
||||||
|
@ -11,6 +11,8 @@
|
|||||||
|
|
||||||
#include "simd_generic.hpp"
|
#include "simd_generic.hpp"
|
||||||
|
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
|
||||||
#ifdef NETGEN_ARCH_AMD64
|
#ifdef NETGEN_ARCH_AMD64
|
||||||
#ifndef __SSE__
|
#ifndef __SSE__
|
||||||
#define __SSE__
|
#define __SSE__
|
||||||
@ -30,8 +32,11 @@
|
|||||||
#include "simd_arm64.hpp"
|
#include "simd_arm64.hpp"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif // __CUDA_ARCH__
|
||||||
|
|
||||||
namespace ngcore
|
namespace ngcore
|
||||||
{
|
{
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
#ifdef NETGEN_ARCH_AMD64
|
#ifdef NETGEN_ARCH_AMD64
|
||||||
NETGEN_INLINE auto HSum (SIMD<double,2> v1, SIMD<double,2> v2, SIMD<double,2> v3, SIMD<double,2> v4)
|
NETGEN_INLINE auto HSum (SIMD<double,2> v1, SIMD<double,2> v2, SIMD<double,2> v3, SIMD<double,2> v4)
|
||||||
{
|
{
|
||||||
@ -45,6 +50,7 @@ namespace ngcore
|
|||||||
return SIMD<mask64>::GetMaskFromBits(i);
|
return SIMD<mask64>::GetMaskFromBits(i);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif // __CUDA_ARCH__
|
||||||
|
|
||||||
NETGEN_INLINE void SIMDTranspose (SIMD<double,4> a1, SIMD<double,4> a2, SIMD <double,4> a3, SIMD<double,4> a4,
|
NETGEN_INLINE void SIMDTranspose (SIMD<double,4> a1, SIMD<double,4> a2, SIMD <double,4> a3, SIMD<double,4> a4,
|
||||||
SIMD<double,4> & b1, SIMD<double,4> & b2, SIMD<double,4> & b3, SIMD<double,4> & b4)
|
SIMD<double,4> & b1, SIMD<double,4> & b2, SIMD<double,4> & b3, SIMD<double,4> & b4)
|
||||||
|
Loading…
Reference in New Issue
Block a user