Add support for AVX512

Restructure mysimd.hpp and add support for AVX512. Changes include

  - Move mysimd.hpp to ngsimd.hpp
  - Expose ngsimd.hpp to NGSolve
  - New namespace ngsimd
  - Second template parameter (width) for SIMD class, default to the
    largest width available
  - Avoid raw avx register types in the interface, use SIMD<> instead
This commit is contained in:
Matthias Hochsteger 2017-04-19 18:02:27 +02:00
parent 70da438d6d
commit 643c89538d
9 changed files with 597 additions and 456 deletions

View File

@ -13,7 +13,7 @@ install( FILES ngexception.hpp DESTINATION ${INCDIR} COMPONENT netgen_devel )
install(FILES install(FILES
archive_base.hpp array.hpp autodiff.hpp autoptr.hpp bitarray.hpp archive_base.hpp array.hpp autodiff.hpp autoptr.hpp bitarray.hpp
dynamicmem.hpp flags.hpp hashtabl.hpp mpi_interface.hpp myadt.hpp dynamicmem.hpp flags.hpp hashtabl.hpp mpi_interface.hpp myadt.hpp
mysimd.hpp mystring.hpp netgenout.hpp ngexception.hpp ngpython.hpp ngsimd.hpp mystring.hpp netgenout.hpp ngexception.hpp ngpython.hpp
optmem.hpp parthreads.hpp profiler.hpp seti.hpp sort.hpp optmem.hpp parthreads.hpp profiler.hpp seti.hpp sort.hpp
spbita2d.hpp stack.hpp symbolta.hpp table.hpp template.hpp spbita2d.hpp stack.hpp symbolta.hpp table.hpp template.hpp
gzstream.h gzstream.h

View File

@ -46,7 +46,7 @@
#include "gzstream.h" #include "gzstream.h"
#include "archive_base.hpp" #include "archive_base.hpp"
#include "mysimd.hpp" #include "ngsimd.hpp"
#endif #endif

View File

@ -1,403 +0,0 @@
#ifndef FILE_MYSIMD
#define FILE_MYSIMD
/**************************************************************************/
/* File: mysimd.hpp */
/* Author: Joachim Schoeberl */
/* Date: 25. Mar. 16 */
/**************************************************************************/
#include <immintrin.h>
#ifdef WIN32
#ifndef AVX_OPERATORS_DEFINED
#define AVX_OPERATORS_DEFINED
inline __m128d operator- (__m128d a) { return _mm_xor_pd(a, _mm_set1_pd(-0.0)); }
inline __m128d operator+ (__m128d a, __m128d b) { return _mm_add_pd(a,b); }
inline __m128d operator- (__m128d a, __m128d b) { return _mm_sub_pd(a,b); }
inline __m128d operator* (__m128d a, __m128d b) { return _mm_mul_pd(a,b); }
inline __m128d operator/ (__m128d a, __m128d b) { return _mm_div_pd(a,b); }
inline __m128d operator* (double a, __m128d b) { return _mm_set1_pd(a)*b; }
inline __m128d operator* (__m128d b, double a) { return _mm_set1_pd(a)*b; }
inline __m128d operator+= (__m128d &a, __m128d b) { return a = a+b; }
inline __m128d operator-= (__m128d &a, __m128d b) { return a = a-b; }
inline __m128d operator*= (__m128d &a, __m128d b) { return a = a*b; }
inline __m128d operator/= (__m128d &a, __m128d b) { return a = a/b; }
inline __m256d operator- (__m256d a) { return _mm256_xor_pd(a, _mm256_set1_pd(-0.0)); }
inline __m256d operator+ (__m256d a, __m256d b) { return _mm256_add_pd(a,b); }
inline __m256d operator- (__m256d a, __m256d b) { return _mm256_sub_pd(a,b); }
inline __m256d operator* (__m256d a, __m256d b) { return _mm256_mul_pd(a,b); }
inline __m256d operator/ (__m256d a, __m256d b) { return _mm256_div_pd(a,b); }
inline __m256d operator* (double a, __m256d b) { return _mm256_set1_pd(a)*b; }
inline __m256d operator* (__m256d b, double a) { return _mm256_set1_pd(a)*b; }
inline __m256d operator+= (__m256d &a, __m256d b) { return a = a+b; }
inline __m256d operator-= (__m256d &a, __m256d b) { return a = a-b; }
inline __m256d operator*= (__m256d &a, __m256d b) { return a = a*b; }
inline __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; }
#endif
#endif
namespace netgen
{
template <typename T> class SIMD;
template <typename T>
struct has_call_operator
{
template <typename C> static std::true_type check( decltype( sizeof(&C::operator() )) ) { return std::true_type(); }
template <typename> static std::false_type check(...) { return std::false_type(); }
typedef decltype( check<T>(sizeof(char)) ) type;
static constexpr type value = type();
};
#ifdef __AVX__
template <typename T>
class AlignedAlloc
{
protected:
static void * aligned_malloc(size_t s)
{
// Assume 16 byte alignment of standard library
if(alignof(T)<=16)
return malloc(s);
else
return _mm_malloc(s, alignof(T));
}
static void aligned_free(void *p)
{
if(alignof(T)<=16)
free(p);
else
_mm_free(p);
}
public:
void * operator new (size_t s, void *p) { return p; }
void * operator new (size_t s) { return aligned_malloc(s); }
void * operator new[] (size_t s) { return aligned_malloc(s); }
void operator delete (void * p) { aligned_free(p); }
void operator delete[] (void * p) { aligned_free(p); }
};
template<>
class alignas(32) SIMD<double> : public AlignedAlloc<SIMD<double>>
{
__m256d data;
public:
static constexpr int Size() { return 4; }
SIMD () = default;
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
SIMD (double val)
{
data = _mm256_set1_pd(val);
}
SIMD (__m256d adata)
: data(adata)
{ ; }
/*
template <typename T>
SIMD (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
}
*/
/*
template <typename T>
SIMD & operator= (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
return *this;
}
*/
template <typename Function>
void SIMD_function (const Function & func, std::true_type)
{
data = _mm256_set_pd(func(3), func(2), func(1), func(0));
}
// not a function
void SIMD_function (double const * p, std::false_type)
{
data = _mm256_loadu_pd(p);
}
void SIMD_function (double val, std::false_type)
{
data = _mm256_set1_pd(val);
}
void SIMD_function (__m256d _data, std::false_type)
{
data = _data;
}
inline double operator[] (int i) const { return ((double*)(&data))[i]; }
inline __m256d Data() const { return data; }
inline __m256d & Data() { return data; }
};
inline SIMD<double> operator+ (SIMD<double> a, SIMD<double> b) { return a.Data()+b.Data(); }
inline SIMD<double> operator- (SIMD<double> a, SIMD<double> b) { return a.Data()-b.Data(); }
inline SIMD<double> operator- (SIMD<double> a) { return -a.Data(); }
inline SIMD<double> operator* (SIMD<double> a, SIMD<double> b) { return a.Data()*b.Data(); }
inline SIMD<double> operator/ (SIMD<double> a, SIMD<double> b) { return a.Data()/b.Data(); }
inline SIMD<double> operator* (double a, SIMD<double> b) { return SIMD<double>(a)*b; }
inline SIMD<double> operator* (SIMD<double> b, double a) { return SIMD<double>(a)*b; }
inline SIMD<double> operator+= (SIMD<double> & a, SIMD<double> b) { return a.Data()+=b.Data(); }
inline SIMD<double> operator-= (SIMD<double> & a, SIMD<double> b) { return a.Data()-=b.Data(); }
inline SIMD<double> operator*= (SIMD<double> & a, SIMD<double> b) { return a.Data()*=b.Data(); }
inline SIMD<double> operator/= (SIMD<double> & a, SIMD<double> b) { return a.Data()/=b.Data(); }
using std::sqrt;
using std::fabs;
inline SIMD<double> sqrt (SIMD<double> a) { return _mm256_sqrt_pd(a.Data()); }
inline SIMD<double> fabs (SIMD<double> a) { return _mm256_max_pd(a.Data(), -a.Data()); }
inline SIMD<double> L2Norm2 (SIMD<double> a) { return a.Data()*a.Data(); }
inline SIMD<double> Trans (SIMD<double> a) { return a; }
inline SIMD<double> IfPos (SIMD<double> a, SIMD<double> b, SIMD<double> c)
{
auto cp = _mm256_cmp_pd (a.Data(), _mm256_setzero_pd(), _CMP_GT_OS);
return _mm256_blendv_pd(c.Data(), b.Data(), cp);
}
inline double HSum (SIMD<double> sd)
{
__m128d hv = _mm_add_pd (_mm256_extractf128_pd(sd.Data(),0), _mm256_extractf128_pd(sd.Data(),1));
return _mm_cvtsd_f64 (_mm_hadd_pd (hv, hv));
}
#else
// it's only a dummy without AVX
template <typename T>
class AlignedAlloc { ; };
template<>
class SIMD<double>
{
double data;
public:
static constexpr int Size() { return 1; }
SIMD () = default;
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
SIMD (double val)
: data(val) { ; }
/*
template <typename T>
SIMD (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
}
*/
template <typename T>
SIMD & operator= (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
return *this;
}
template <typename Function>
void SIMD_function (const Function & func, std::true_type)
{
data = func(0);
}
// not a function
void SIMD_function (double const * p, std::false_type)
{
data = *p;
}
void SIMD_function (double val, std::false_type)
{
data = val;
}
double operator[] (int i) const { return ((double*)(&data))[i]; }
double Data() const { return data; }
double & Data() { return data; }
};
inline SIMD<double> operator+ (SIMD<double> a, SIMD<double> b) { return a.Data()+b.Data(); }
inline SIMD<double> operator- (SIMD<double> a, SIMD<double> b) { return a.Data()-b.Data(); }
inline SIMD<double> operator- (SIMD<double> a) { return -a.Data(); }
inline SIMD<double> operator* (SIMD<double> a, SIMD<double> b) { return a.Data()*b.Data(); }
inline SIMD<double> operator/ (SIMD<double> a, SIMD<double> b) { return a.Data()/b.Data(); }
inline SIMD<double> operator* (double a, SIMD<double> b) { return SIMD<double>(a)*b; }
inline SIMD<double> operator* (SIMD<double> b, double a) { return SIMD<double>(a)*b; }
inline SIMD<double> operator+= (SIMD<double> & a, SIMD<double> b) { return a.Data()+=b.Data(); }
inline SIMD<double> operator-= (SIMD<double> & a, SIMD<double> b) { return a.Data()-=b.Data(); }
inline SIMD<double> operator*= (SIMD<double> & a, SIMD<double> b) { return a.Data()*=b.Data(); }
inline SIMD<double> operator/= (SIMD<double> & a, SIMD<double> b) { return a.Data()/=b.Data(); }
using std::sqrt;
using std::fabs;
inline SIMD<double> sqrt (SIMD<double> a) { return std::sqrt(a.Data()); }
inline SIMD<double> fabs (SIMD<double> a) { return std::fabs(a.Data()); }
inline SIMD<double> L2Norm2 (SIMD<double> a) { return a.Data()*a.Data(); }
inline SIMD<double> Trans (SIMD<double> a) { return a; }
inline SIMD<double> IfPos (SIMD<double> a, SIMD<double> b, SIMD<double> c)
{
return (a.Data() > 0) ? b : c;
}
inline double HSum (SIMD<double> sd)
{ return sd.Data(); }
#endif
template <typename T>
ostream & operator<< (ostream & ost, SIMD<T> simd)
{
ost << simd[0];
for (int i = 1; i < simd.Size(); i++)
ost << " " << simd[i];
return ost;
}
/*
using std::exp;
inline netgen::SIMD<double> exp (netgen::SIMD<double> a) {
return netgen::SIMD<double>([&](int i)->double { return exp(a[i]); } );
}
using std::log;
inline netgen::SIMD<double> log (netgen::SIMD<double> a) {
return netgen::SIMD<double>([&](int i)->double { return log(a[i]); } );
}
using std::pow;
inline netgen::SIMD<double> pow (netgen::SIMD<double> a, double x) {
return netgen::SIMD<double>([&](int i)->double { return pow(a[i],x); } );
}
*/
template <int D, typename T>
class MultiSIMD
{
SIMD<T> head;
MultiSIMD<D-1,T> tail;
public:
MultiSIMD () = default;
MultiSIMD (const MultiSIMD & ) = default;
MultiSIMD (T v) : head(v), tail(v) { ; }
MultiSIMD (SIMD<T> _head, MultiSIMD<D-1,T> _tail)
: head(_head), tail(_tail) { ; }
SIMD<T> Head() const { return head; }
MultiSIMD<D-1,T> Tail() const { return tail; }
SIMD<T> & Head() { return head; }
MultiSIMD<D-1,T> & Tail() { return tail; }
template <int NR>
SIMD<T> Get() const { return NR==0 ? head : tail.template Get<NR-1>(); }
template <int NR>
SIMD<T> & Get() { return NR==0 ? head : tail.template Get<NR-1>(); }
};
template <typename T>
class MultiSIMD<2,T>
{
SIMD<T> v0, v1;
public:
MultiSIMD () = default;
MultiSIMD (const MultiSIMD & ) = default;
MultiSIMD (T v) : v0(v), v1(v) { ; }
MultiSIMD (SIMD<T> _v0, SIMD<T> _v1) : v0(_v0), v1(_v1) { ; }
SIMD<T> Head() const { return v0; }
SIMD<T> Tail() const { return v1; }
SIMD<T> & Head() { return v0; }
SIMD<T> & Tail() { return v1; }
template <int NR>
SIMD<T> Get() const { return NR==0 ? v0 : v1; }
template <int NR>
SIMD<T> & Get() { return NR==0 ? v0 : v1; }
};
template <int D> inline MultiSIMD<D,double> operator+ (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()+b.Head(), a.Tail()+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator+ (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a+b.Head(), a+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator+ (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> (a+b.Head(), a+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()-b.Head(), a.Tail()-b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a-b.Head(), a-b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> (b.Head()-a, b.Tail()-a); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> a)
{ return MultiSIMD<D,double> (-a.Head(), -a.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()*b.Head(), a.Tail()*b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator/ (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()/b.Head(), a.Tail()/b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> ( a*b.Head(), a*b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> ( a*b.Head(), a*b.Tail()); }
template <int D> inline MultiSIMD<D,double> & operator+= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()+=b.Head(); a.Tail()+=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> operator-= (MultiSIMD<D,double> & a, double b)
{ a.Head()-=b; a.Tail()-=b; return a; }
template <int D> inline MultiSIMD<D,double> operator-= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()-=b.Head(); a.Tail()-=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> & operator*= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()*=b.Head(); a.Tail()*=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> & operator*= (MultiSIMD<D,double> & a, double b)
{ a.Head()*=b; a.Tail()*=b; return a; }
// inline MultiSIMD<double> operator/= (MultiSIMD<double> & a, MultiSIMD<double> b) { return a.Data()/=b.Data(); }
template <int D, typename T>
ostream & operator<< (ostream & ost, MultiSIMD<D,T> multi)
{
ost << multi.Head() << " " << multi.Tail();
return ost;
}
}
#endif

553
libsrc/general/ngsimd.hpp Normal file
View File

@ -0,0 +1,553 @@
#ifndef FILE_NGSIMD
#define FILE_NGSIMD
/**************************************************************************/
/* File: ngsimd.hpp */
/* Author: Joachim Schoeberl */
/* Date: 25. Mar. 16 */
/**************************************************************************/
#include <immintrin.h>
#include <tuple>
#include <ostream>
#include <stdexcept>
#include <string>
#include <type_traits>
#ifdef WIN32
#ifndef AVX_OPERATORS_DEFINED
#define AVX_OPERATORS_DEFINED
inline __m128d operator- (__m128d a) { return _mm_xor_pd(a, _mm_set1_pd(-0.0)); }
inline __m128d operator+ (__m128d a, __m128d b) { return _mm_add_pd(a,b); }
inline __m128d operator- (__m128d a, __m128d b) { return _mm_sub_pd(a,b); }
inline __m128d operator* (__m128d a, __m128d b) { return _mm_mul_pd(a,b); }
inline __m128d operator/ (__m128d a, __m128d b) { return _mm_div_pd(a,b); }
inline __m128d operator* (double a, __m128d b) { return _mm_set1_pd(a)*b; }
inline __m128d operator* (__m128d b, double a) { return _mm_set1_pd(a)*b; }
inline __m128d operator+= (__m128d &a, __m128d b) { return a = a+b; }
inline __m128d operator-= (__m128d &a, __m128d b) { return a = a-b; }
inline __m128d operator*= (__m128d &a, __m128d b) { return a = a*b; }
inline __m128d operator/= (__m128d &a, __m128d b) { return a = a/b; }
inline __m256d operator- (__m256d a) { return _mm256_xor_pd(a, _mm256_set1_pd(-0.0)); }
inline __m256d operator+ (__m256d a, __m256d b) { return _mm256_add_pd(a,b); }
inline __m256d operator- (__m256d a, __m256d b) { return _mm256_sub_pd(a,b); }
inline __m256d operator* (__m256d a, __m256d b) { return _mm256_mul_pd(a,b); }
inline __m256d operator/ (__m256d a, __m256d b) { return _mm256_div_pd(a,b); }
inline __m256d operator* (double a, __m256d b) { return _mm256_set1_pd(a)*b; }
inline __m256d operator* (__m256d b, double a) { return _mm256_set1_pd(a)*b; }
inline __m256d operator+= (__m256d &a, __m256d b) { return a = a+b; }
inline __m256d operator-= (__m256d &a, __m256d b) { return a = a-b; }
inline __m256d operator*= (__m256d &a, __m256d b) { return a = a*b; }
inline __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; }
#endif
#endif
namespace ngsimd
{
constexpr int GetDefaultSIMDSize() {
#if defined __AVX512F__
return 8;
#elif defined __AVX__
return 4;
#else
return 1;
#endif
}
template <typename T, int N=GetDefaultSIMDSize()> class SIMD;
template <typename T>
struct has_call_operator
{
template <typename C> static std::true_type check( decltype( sizeof(&C::operator() )) ) { return std::true_type(); }
template <typename> static std::false_type check(...) { return std::false_type(); }
typedef decltype( check<T>(sizeof(char)) ) type;
static constexpr type value = type();
};
template <typename T1, typename T2, typename T3>
// a*b+c
inline auto FMA(T1 a, T2 b, T3 c)
{
return a*b+c;
}
template<int N, typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
inline SIMD<double,N> operator+ (T a, SIMD<double,N> b) { return SIMD<double,N>(a) + b; }
template<int N, typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
inline SIMD<double,N> operator- (T a, SIMD<double,N> b) { return SIMD<double,N>(a) - b; }
template<int N, typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
inline SIMD<double,N> operator* (T a, SIMD<double,N> b) { return SIMD<double,N>(a) * b; }
template<int N, typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
inline SIMD<double,N> operator/ (T a, SIMD<double,N> b) { return SIMD<double,N>(a) / b; }
template<int N, typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
inline SIMD<double,N> operator+ (SIMD<double,N> a, T b) { return a + SIMD<double,N>(b); }
template<int N, typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
inline SIMD<double,N> operator- (SIMD<double,N> a, T b) { return a - SIMD<double,N>(b); }
template<int N, typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
inline SIMD<double,N> operator* (SIMD<double,N> a, T b) { return a * SIMD<double,N>(b); }
template<int N, typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
inline SIMD<double,N> operator/ (SIMD<double,N> a, T b) { return a / SIMD<double,N>(b); }
#ifdef __AVX__
template <typename T>
class AlignedAlloc
{
protected:
static void * aligned_malloc(size_t s)
{
// Assume 16 byte alignment of standard library
if(alignof(T)<=16)
return malloc(s);
else
return _mm_malloc(s, alignof(T));
}
static void aligned_free(void *p)
{
if(alignof(T)<=16)
free(p);
else
_mm_free(p);
}
public:
void * operator new (size_t s, void *p) { return p; }
void * operator new (size_t s) { return aligned_malloc(s); }
void * operator new[] (size_t s) { return aligned_malloc(s); }
void operator delete (void * p) { aligned_free(p); }
void operator delete[] (void * p) { aligned_free(p); }
};
#else
// it's only a dummy without AVX
template <typename T>
class AlignedAlloc { ; };
#endif
using std::sqrt;
using std::fabs;
class ExceptionNOSIMD : public std::runtime_error
{
public:
using std::runtime_error::runtime_error;
std::string What() { return what(); }
};
using std::exp;
template<int N> inline SIMD<double,N> exp (SIMD<double,N> a)
{
return SIMD<double,N>([&](int i)->double { return exp(a[i]); } );
}
using std::log;
template<int N> inline SIMD<double,N> log (SIMD<double,N> a)
{
return SIMD<double,N>([&](int i)->double { return log(a[i]); } );
}
using std::pow;
template<int N> inline SIMD<double,N> pow (SIMD<double,N> a, double x)
{
return SIMD<double,N>([&](int i)->double { return pow(a[i],x); } );
}
using std::sin;
template<int N> inline SIMD<double,N> sin (SIMD<double,N> a)
{
return SIMD<double,N>([&](int i)->double { return sin(a[i]); } );
}
using std::cos;
template<int N> inline SIMD<double,N> cos (SIMD<double,N> a)
{
return SIMD<double,N>([&](int i)->double { return cos(a[i]); } );
}
using std::tan;
template<int N> inline SIMD<double,N> tan (SIMD<double,N> a)
{
return SIMD<double,N>([&](int i)->double { return tan(a[i]); } );
}
using std::atan;
template<int N> inline SIMD<double,N> atan (SIMD<double,N> a)
{
return SIMD<double,N>([&](int i)->double { return atan(a[i]); } );
}
/////////////////////////////////////////////////////////////////////////////
// SIMD width 1 (in case no AVX support is available)
/////////////////////////////////////////////////////////////////////////////
template<>
class SIMD<double,1>
{
double data;
public:
static constexpr int Size() { return 1; }
SIMD () = default;
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
// only called if T has a call operator of appropriate type
template<typename T, typename std::enable_if<std::is_convertible<T, std::function<double(int)>>::value, int>::type = 0>
SIMD (const T & func)
{
data = func(0);
}
// only called if T is arithmetic (integral or floating point types)
template<typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
SIMD (const T & val)
{
data = val;
}
SIMD (double const * p)
{
data = *p;
}
inline operator double() const { return data; }
inline double operator[] (int i) const { return ((double*)(&data))[i]; }
inline double Data() const { return data; }
inline double & Data() { return data; }
inline SIMD<double,1> &operator+= (SIMD<double,1> b) { data+=b.Data(); return *this; }
inline SIMD<double,1> &operator-= (SIMD<double,1> b) { data-=b.Data(); return *this; }
inline SIMD<double,1> &operator*= (SIMD<double,1> b) { data*=b.Data(); return *this; }
inline SIMD<double,1> &operator/= (SIMD<double,1> b) { data/=b.Data(); return *this; }
};
inline SIMD<double,1> operator+ (SIMD<double,1> a, SIMD<double,1> b) { return a.Data()+b.Data(); }
inline SIMD<double,1> operator- (SIMD<double,1> a, SIMD<double,1> b) { return a.Data()-b.Data(); }
inline SIMD<double,1> operator- (SIMD<double,1> a) { return -a.Data(); }
inline SIMD<double,1> operator* (SIMD<double,1> a, SIMD<double,1> b) { return a.Data()*b.Data(); }
inline SIMD<double,1> operator/ (SIMD<double,1> a, SIMD<double,1> b) { return a.Data()/b.Data(); }
inline SIMD<double,1> sqrt (SIMD<double,1> a) { return std::sqrt(a.Data()); }
inline SIMD<double,1> fabs (SIMD<double,1> a) { return std::fabs(a.Data()); }
inline SIMD<double,1> L2Norm2 (SIMD<double,1> a) { return a.Data()*a.Data(); }
inline SIMD<double,1> Trans (SIMD<double,1> a) { return a; }
inline SIMD<double,1> IfPos (SIMD<double,1> a, SIMD<double,1> b, SIMD<double,1> c)
{
return (a.Data() > 0) ? b : c;
}
inline double HSum (SIMD<double,1> sd)
{
return sd.Data();
}
/////////////////////////////////////////////////////////////////////////////
// AVX - Simd width 4
/////////////////////////////////////////////////////////////////////////////
#ifdef __AVX__
template<>
class alignas(32) SIMD<double,4> : public AlignedAlloc<SIMD<double,4>>
{
__m256d data;
public:
static constexpr int Size() { return 4; }
SIMD () = default;
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
SIMD (__m256d adata)
: data(adata)
{ ; }
// only called if T has a call operator of appropriate type
template<typename T, typename std::enable_if<std::is_convertible<T, std::function<double(int)>>::value, int>::type = 0>
SIMD (const T & func)
{
data = _mm256_set_pd(func(3), func(2), func(1), func(0));
}
// only called if T is arithmetic (integral or floating point types)
template<typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
SIMD (const T & val)
{
data = _mm256_set1_pd(val);
}
SIMD (double const * p)
{
data = _mm256_loadu_pd(p);
}
inline operator __m256d() const { return data; }
inline double operator[] (int i) const { return ((double*)(&data))[i]; }
inline __m256d Data() const { return data; }
inline __m256d & Data() { return data; }
inline SIMD<double,4> &operator+= (SIMD<double,4> b) { data+=b.Data(); return *this; }
inline SIMD<double,4> &operator-= (SIMD<double,4> b) { data-=b.Data(); return *this; }
inline SIMD<double,4> &operator*= (SIMD<double,4> b) { data*=b.Data(); return *this; }
inline SIMD<double,4> &operator/= (SIMD<double,4> b) { data/=b.Data(); return *this; }
};
inline SIMD<double,4> operator+ (SIMD<double,4> a, SIMD<double,4> b) { return a.Data()+b.Data(); }
inline SIMD<double,4> operator- (SIMD<double,4> a, SIMD<double,4> b) { return a.Data()-b.Data(); }
inline SIMD<double,4> operator- (SIMD<double,4> a) { return -a.Data(); }
inline SIMD<double,4> operator* (SIMD<double,4> a, SIMD<double,4> b) { return a.Data()*b.Data(); }
inline SIMD<double,4> operator/ (SIMD<double,4> a, SIMD<double,4> b) { return a.Data()/b.Data(); }
inline SIMD<double,4> sqrt (SIMD<double,4> a) { return _mm256_sqrt_pd(a.Data()); }
inline SIMD<double,4> fabs (SIMD<double,4> a) { return _mm256_max_pd(a.Data(), -a.Data()); }
inline SIMD<double,4> L2Norm2 (SIMD<double,4> a) { return a.Data()*a.Data(); }
inline SIMD<double,4> Trans (SIMD<double,4> a) { return a; }
inline SIMD<double,4> IfPos (SIMD<double,4> a, SIMD<double,4> b, SIMD<double,4> c)
{
auto cp = _mm256_cmp_pd (a.Data(), _mm256_setzero_pd(), _CMP_GT_OS);
return _mm256_blendv_pd(c.Data(), b.Data(), cp);
}
inline double HSum (SIMD<double,4> sd)
{
__m128d hv = _mm_add_pd (_mm256_extractf128_pd(sd.Data(),0), _mm256_extractf128_pd(sd.Data(),1));
return _mm_cvtsd_f64 (_mm_hadd_pd (hv, hv));
}
inline auto HSum (SIMD<double,4> sd1, SIMD<double,4> sd2)
{
__m256d hv = _mm256_hadd_pd(sd1.Data(), sd2.Data());
__m128d hv2 = _mm_add_pd (_mm256_extractf128_pd(hv,0), _mm256_extractf128_pd(hv,1));
return std::make_tuple(_mm_cvtsd_f64 (hv2), _mm_cvtsd_f64(_mm_shuffle_pd (hv2, hv2, 3)));
}
inline SIMD<double,4> HSum (SIMD<double,4> v1, SIMD<double,4> v2, SIMD<double,4> v3, SIMD<double,4> v4)
{
__m256d hsum1 = _mm256_hadd_pd (v1.Data(), v2.Data());
__m256d hsum2 = _mm256_hadd_pd (v3.Data(), v4.Data());
__m256d hsum = _mm256_add_pd (_mm256_permute2f128_pd (hsum1, hsum2, 1+2*16),
_mm256_blend_pd (hsum1, hsum2, 12));
return hsum;
}
#endif // __AVX__
/////////////////////////////////////////////////////////////////////////////
// AVX512 - Simd width 8
/////////////////////////////////////////////////////////////////////////////
#ifdef __AVX512F__
template<>
class alignas(64) SIMD<double,8> : public AlignedAlloc<SIMD<double,8>>
{
__m512d data;
public:
static constexpr int Size() { return 8; }
SIMD () = default;
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
SIMD (__m512d adata)
: data(adata)
{ ; }
// only called if T has a call operator of appropriate type
template<typename T, typename std::enable_if<std::is_convertible<T, std::function<double(int)>>::value, int>::type = 0>
SIMD (const T & func)
{
data = _mm512_set_pd(func(7), func(6), func(5), func(4),
func(3), func(2), func(1), func(0));
}
// only called if T is arithmetic (integral or floating point types)
template<typename T, typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
SIMD (const T & val)
{
data = _mm512_set1_pd(val);
}
SIMD (double const * p)
{
data = _mm512_loadu_pd(p);
}
inline operator __m512d() const { return data; }
inline double operator[] (int i) const { return ((double*)(&data))[i]; }
inline __m512d Data() const { return data; }
inline __m512d & Data() { return data; }
inline SIMD<double,8> &operator+= (SIMD<double,8> b) { data+=b.Data(); return *this; }
inline SIMD<double,8> &operator-= (SIMD<double,8> b) { data-=b.Data(); return *this; }
inline SIMD<double,8> &operator*= (SIMD<double,8> b) { data*=b.Data(); return *this; }
inline SIMD<double,8> &operator/= (SIMD<double,8> b) { data/=b.Data(); return *this; }
};
inline SIMD<double,8> operator- (SIMD<double,8> a) { return _mm512_sub_pd(_mm512_setzero_pd(), a.Data()); }
inline SIMD<double,8> operator+ (SIMD<double,8> a, SIMD<double,8> b) { return _mm512_add_pd(a.Data(),b.Data()); }
inline SIMD<double,8> operator- (SIMD<double,8> a, SIMD<double,8> b) { return _mm512_sub_pd(a.Data(),b.Data()); }
inline SIMD<double,8> operator* (SIMD<double,8> a, SIMD<double,8> b) { return _mm512_mul_pd(a.Data(),b.Data()); }
inline SIMD<double,8> operator/ (SIMD<double,8> a, SIMD<double,8> b) { return _mm512_div_pd(a.Data(),b.Data()); }
inline SIMD<double,8> sqrt (SIMD<double,8> a) { return _mm512_sqrt_pd(a.Data()); }
inline SIMD<double,8> fabs (SIMD<double,8> a) { return _mm512_max_pd(a.Data(), -a.Data()); }
inline SIMD<double,8> L2Norm2 (SIMD<double,8> a) { return a.Data()*a.Data(); }
inline SIMD<double,8> Trans (SIMD<double,8> a) { return a; }
inline SIMD<double,8> IfPos (SIMD<double,8> a, SIMD<double,8> b, SIMD<double,8> c)
{
auto cp = _mm512_cmp_pd_mask (a.Data(), _mm512_setzero_pd(), _MM_CMPINT_GT);
return _mm512_mask_blend_pd(cp, c.Data(), b.Data());
}
template<> inline auto FMA (SIMD<double,8> a, SIMD<double,8> b, SIMD<double,8> c)
{
return _mm512_fmadd_pd (a.Data(), b.Data(), c.Data());
}
inline double HSum (SIMD<double,8> sd)
{
SIMD<double,4> low = _mm512_extractf64x4_pd(sd.Data(),0);
SIMD<double,4> high = _mm512_extractf64x4_pd(sd.Data(),1);
return HSum(low)+HSum(high);
}
inline auto HSum (SIMD<double,8> sd1, SIMD<double,8> sd2)
{
return std::make_tuple(HSum(sd1), HSum(sd2));
}
inline SIMD<double,4> HSum (SIMD<double,8> v1, SIMD<double,8> v2, SIMD<double,8> v3, SIMD<double,8> v4)
{
SIMD<double,4> high1 = _mm512_extractf64x4_pd(v1.Data(),1);
SIMD<double,4> high2 = _mm512_extractf64x4_pd(v2.Data(),1);
SIMD<double,4> high3 = _mm512_extractf64x4_pd(v3.Data(),1);
SIMD<double,4> high4 = _mm512_extractf64x4_pd(v4.Data(),1);
SIMD<double,4> low1 = _mm512_extractf64x4_pd(v1.Data(),0);
SIMD<double,4> low2 = _mm512_extractf64x4_pd(v2.Data(),0);
SIMD<double,4> low3 = _mm512_extractf64x4_pd(v3.Data(),0);
SIMD<double,4> low4 = _mm512_extractf64x4_pd(v4.Data(),0);
return HSum(low1,low2,low3,low4) + HSum(high1,high2,high3,high4);
}
#endif // __AVX512F__
////////////////////////////////////////////////////////////////////////////////
// MultiSIMD - Multiple SIMD values in one struct using head-tail implementation
////////////////////////////////////////////////////////////////////////////////
template <int D, typename T>
class MultiSIMD : public AlignedAlloc<MultiSIMD<D,T>>
{
SIMD<T> head;
MultiSIMD<D-1,T> tail;
public:
MultiSIMD () = default;
MultiSIMD (const MultiSIMD & ) = default;
MultiSIMD (T v) : head(v), tail(v) { ; }
MultiSIMD (SIMD<T> _head, MultiSIMD<D-1,T> _tail)
: head(_head), tail(_tail) { ; }
SIMD<T> Head() const { return head; }
MultiSIMD<D-1,T> Tail() const { return tail; }
SIMD<T> & Head() { return head; }
MultiSIMD<D-1,T> & Tail() { return tail; }
template <int NR>
SIMD<T> Get() const { return NR==0 ? head : tail.template Get<NR-1>(); }
template <int NR>
SIMD<T> & Get() { return NR==0 ? head : tail.template Get<NR-1>(); }
};
template <typename T>
class MultiSIMD<2,T> : public AlignedAlloc<MultiSIMD<2,T>>
{
SIMD<T> v0, v1;
public:
MultiSIMD () = default;
MultiSIMD (const MultiSIMD & ) = default;
MultiSIMD (T v) : v0(v), v1(v) { ; }
MultiSIMD (SIMD<T> _v0, SIMD<T> _v1) : v0(_v0), v1(_v1) { ; }
SIMD<T> Head() const { return v0; }
SIMD<T> Tail() const { return v1; }
SIMD<T> & Head() { return v0; }
SIMD<T> & Tail() { return v1; }
template <int NR>
SIMD<T> Get() const { return NR==0 ? v0 : v1; }
template <int NR>
SIMD<T> & Get() { return NR==0 ? v0 : v1; }
};
template <int D> inline MultiSIMD<D,double> operator+ (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()+b.Head(), a.Tail()+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator+ (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a+b.Head(), a+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator+ (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> (a+b.Head(), a+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()-b.Head(), a.Tail()-b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a-b.Head(), a-b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> (b.Head()-a, b.Tail()-a); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> a)
{ return MultiSIMD<D,double> (-a.Head(), -a.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()*b.Head(), a.Tail()*b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator/ (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()/b.Head(), a.Tail()/b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> ( a*b.Head(), a*b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> ( a*b.Head(), a*b.Tail()); }
template <int D> inline MultiSIMD<D,double> & operator+= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()+=b.Head(); a.Tail()+=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> operator-= (MultiSIMD<D,double> & a, double b)
{ a.Head()-=b; a.Tail()-=b; return a; }
template <int D> inline MultiSIMD<D,double> operator-= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()-=b.Head(); a.Tail()-=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> & operator*= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()*=b.Head(); a.Tail()*=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> & operator*= (MultiSIMD<D,double> & a, double b)
{ a.Head()*=b; a.Tail()*=b; return a; }
// inline MultiSIMD<double> operator/= (MultiSIMD<double> & a, MultiSIMD<double> b) { return a.Data()/=b.Data(); }
inline SIMD<double> HVSum (SIMD<double> a) { return a; }
template <int D>
inline SIMD<double> HVSum (MultiSIMD<D,double> a) { return a.Head() + HVSum(a.Tail()); }
template <int D> inline double HSum (MultiSIMD<D,double> a) { return HSum(HVSum(a)); }
template <int D> inline auto HSum (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return HSum(HVSum(a), HVSum(b)); }
template <int D, typename T>
std::ostream & operator<< (std::ostream & ost, MultiSIMD<D,T> multi)
{
ost << multi.Head() << " " << multi.Tail();
return ost;
}
template <typename T>
std::ostream & operator<< (std::ostream & ost, SIMD<T> simd)
{
ost << simd[0];
for (int i = 1; i < simd.Size(); i++)
ost << " " << simd[i];
return ost;
}
}
namespace netgen
{
using namespace ngsimd;
}
#endif

View File

@ -3,7 +3,7 @@ install(FILES nginterface.h nginterface_v2.hpp DESTINATION ${INCDIR} COMPONENT n
install(FILES install(FILES
acisgeom.hpp csg.hpp geometry2d.hpp gprim.hpp incopengl.hpp acisgeom.hpp csg.hpp geometry2d.hpp gprim.hpp incopengl.hpp
inctcl.hpp incvis.hpp linalg.hpp meshing.hpp myadt.hpp mydefs.hpp inctcl.hpp incvis.hpp linalg.hpp meshing.hpp myadt.hpp mydefs.hpp
mystdlib.h nginterface_v2_impl.hpp occgeom.hpp mystdlib.h nginterface_v2_impl.hpp occgeom.hpp ngsimd.hpp
opti.hpp parallel.hpp parallelinterface.hpp stlgeom.hpp visual.hpp opti.hpp parallel.hpp parallelinterface.hpp stlgeom.hpp visual.hpp
DESTINATION ${INCDIR}/include COMPONENT netgen_devel DESTINATION ${INCDIR}/include COMPONENT netgen_devel
) )

View File

@ -0,0 +1 @@
#include <../general/ngsimd.hpp>

View File

@ -646,29 +646,26 @@ namespace netgen
#ifdef __AVX__
#include <immintrin.h>
template<> DLL_HEADER void Ngx_Mesh :: template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<1,1> (int elnr, int npts, MultiElementTransformation<1,1> (int elnr, int npts,
const __m256d * xi, size_t sxi, const SIMD<double> * xi, size_t sxi,
__m256d * x, size_t sx, SIMD<double> * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const SIMD<double> * dxdxi, size_t sdxdxi) const
{ {
cout << "multi-eltrafo simd called, 1,1,simd" << endl; cout << "multi-eltrafo simd called, 1,1,simd" << endl;
} }
template<> DLL_HEADER void Ngx_Mesh :: template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<2,2> (int elnr, int npts, MultiElementTransformation<2,2> (int elnr, int npts,
const __m256d * xi, size_t sxi, const SIMD<double> * xi, size_t sxi,
__m256d * x, size_t sx, SIMD<double> * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const SIMD<double> * dxdxi, size_t sdxdxi) const
{ {
mesh->GetCurvedElements().CalcMultiPointSurfaceTransformation<2> mesh->GetCurvedElements().CalcMultiPointSurfaceTransformation<2>
(elnr, npts, (elnr, npts,
reinterpret_cast<const SIMD<double>*> (xi), sxi, xi, sxi,
reinterpret_cast<SIMD<double>*> (x), sx, x, sx,
reinterpret_cast<SIMD<double>*> (dxdxi), sdxdxi); dxdxi, sdxdxi);
/* /*
for (int i = 0; i < npts; i++) for (int i = 0; i < npts; i++)
{ {
@ -695,15 +692,15 @@ namespace netgen
template<> DLL_HEADER void Ngx_Mesh :: template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<3,3> (int elnr, int npts, MultiElementTransformation<3,3> (int elnr, int npts,
const __m256d * xi, size_t sxi, const SIMD<double> * xi, size_t sxi,
__m256d * x, size_t sx, SIMD<double> * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const SIMD<double> * dxdxi, size_t sdxdxi) const
{ {
mesh->GetCurvedElements().CalcMultiPointElementTransformation mesh->GetCurvedElements().CalcMultiPointElementTransformation
(elnr, npts, (elnr, npts,
reinterpret_cast<const SIMD<double>*> (xi), sxi, xi, sxi,
reinterpret_cast<SIMD<double>*> (x), sx, x, sx,
reinterpret_cast<SIMD<double>*> (dxdxi), sdxdxi); dxdxi, sdxdxi);
/* /*
for (int i = 0; i < npts; i++) for (int i = 0; i < npts; i++)
{ {
@ -730,27 +727,27 @@ namespace netgen
template<> DLL_HEADER void Ngx_Mesh :: template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<0,2> (int elnr, int npts, MultiElementTransformation<0,2> (int elnr, int npts,
const __m256d *xi, size_t sxi, const SIMD<double> *xi, size_t sxi,
__m256d * x, size_t sx, SIMD<double> * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const SIMD<double> * dxdxi, size_t sdxdxi) const
{ {
cout << "MultiElementtransformation<0,2> simd not implemented" << endl; cout << "MultiElementtransformation<0,2> simd not implemented" << endl;
} }
template<> DLL_HEADER void Ngx_Mesh :: template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<0,1> (int elnr, int npts, MultiElementTransformation<0,1> (int elnr, int npts,
const __m256d * xi, size_t sxi, const SIMD<double> * xi, size_t sxi,
__m256d * x, size_t sx, SIMD<double> * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const SIMD<double> * dxdxi, size_t sdxdxi) const
{ {
cout << "multi-eltrafo simd called, 0,1,simd" << endl; cout << "multi-eltrafo simd called, 0,1,simd" << endl;
} }
template<> DLL_HEADER void Ngx_Mesh :: template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<1,3> (int elnr, int npts, MultiElementTransformation<1,3> (int elnr, int npts,
const __m256d * xi, size_t sxi, const SIMD<double> * xi, size_t sxi,
__m256d * x, size_t sx, SIMD<double> * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const SIMD<double> * dxdxi, size_t sdxdxi) const
{ {
double hxi[4][1]; double hxi[4][1];
double hx[4][3]; double hx[4][3];
@ -772,9 +769,9 @@ namespace netgen
template<> DLL_HEADER void Ngx_Mesh :: template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<1,2> (int elnr, int npts, MultiElementTransformation<1,2> (int elnr, int npts,
const __m256d * xi, size_t sxi, const SIMD<double> * xi, size_t sxi,
__m256d * x, size_t sx, SIMD<double> * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const SIMD<double> * dxdxi, size_t sdxdxi) const
{ {
for (int i = 0; i < npts; i++) for (int i = 0; i < npts; i++)
{ {
@ -801,15 +798,15 @@ namespace netgen
template<> DLL_HEADER void Ngx_Mesh :: template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<2,3> (int elnr, int npts, MultiElementTransformation<2,3> (int elnr, int npts,
const __m256d * xi, size_t sxi, const SIMD<double> * xi, size_t sxi,
__m256d * x, size_t sx, SIMD<double> * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const SIMD<double> * dxdxi, size_t sdxdxi) const
{ {
mesh->GetCurvedElements().CalcMultiPointSurfaceTransformation<3> mesh->GetCurvedElements().CalcMultiPointSurfaceTransformation<3>
(elnr, npts, (elnr, npts,
reinterpret_cast<const SIMD<double>*> (xi), sxi, xi, sxi,
reinterpret_cast<SIMD<double>*> (x), sx, x, sx,
reinterpret_cast<SIMD<double>*> (dxdxi), sdxdxi); dxdxi, sdxdxi);
/* /*
for (int i = 0; i < npts; i++) for (int i = 0; i < npts; i++)
{ {
@ -834,7 +831,6 @@ namespace netgen
*/ */
} }
#endif

View File

@ -92,17 +92,15 @@ namespace netgen
return res; return res;
} }
#ifdef __AVX__
virtual bool GetMultiSurfValue (size_t selnr, size_t facetnr, size_t npts, virtual bool GetMultiSurfValue (size_t selnr, size_t facetnr, size_t npts,
const __m256d * xref, const SIMD<double> * xref,
const __m256d * x, const SIMD<double> * x,
const __m256d * dxdxref, const SIMD<double> * dxdxref,
__m256d * values) SIMD<double> * values)
{ {
cerr << "GetMultiSurfVaue not overloaded" << endl; cerr << "GetMultiSurfVaue not overloaded for SIMD<double>" << endl;
return false; return false;
} }
#endif
virtual bool GetSegmentValue (int segnr, double xref, double * values) virtual bool GetSegmentValue (int segnr, double xref, double * values)
{ return false; } { return false; }

View File

@ -1279,7 +1279,6 @@ namespace netgen
Array<double> mvalues(npt); Array<double> mvalues(npt);
int sol_comp = (sol && sol->draw_surface) ? sol->components : 0; int sol_comp = (sol && sol->draw_surface) ? sol->components : 0;
#ifdef __AVX__
Array<Point<2,SIMD<double>> > simd_pref ( (npt+SIMD<double>::Size()-1)/SIMD<double>::Size() ); Array<Point<2,SIMD<double>> > simd_pref ( (npt+SIMD<double>::Size()-1)/SIMD<double>::Size() );
Array<Point<3,SIMD<double>> > simd_points ( (npt+SIMD<double>::Size()-1)/SIMD<double>::Size() ); Array<Point<3,SIMD<double>> > simd_points ( (npt+SIMD<double>::Size()-1)/SIMD<double>::Size() );
Array<Mat<3,2,SIMD<double>> > simd_dxdxis ( (npt+SIMD<double>::Size()-1)/SIMD<double>::Size() ); Array<Mat<3,2,SIMD<double>> > simd_dxdxis ( (npt+SIMD<double>::Size()-1)/SIMD<double>::Size() );
@ -1287,7 +1286,6 @@ namespace netgen
Array<SIMD<double>> simd_values( (npt+SIMD<double>::Size()-1)/SIMD<double>::Size() * sol_comp); Array<SIMD<double>> simd_values( (npt+SIMD<double>::Size()-1)/SIMD<double>::Size() * sol_comp);
#endif
// Array<Point<3,float>> glob_pnts; // Array<Point<3,float>> glob_pnts;
// Array<Vec<3,float>> glob_nvs; // Array<Vec<3,float>> glob_nvs;
@ -1486,7 +1484,6 @@ namespace netgen
NgProfiler::StartTimer(timerloops); NgProfiler::StartTimer(timerloops);
size_t base_pi = 0; size_t base_pi = 0;
#ifdef __AVX__
for (int iy = 0, ii = 0; iy <= n; iy++) for (int iy = 0, ii = 0; iy <= n; iy++)
for (int ix = 0; ix <= n-iy; ix++, ii++) for (int ix = 0; ix <= n-iy; ix++, ii++)
pref[ii] = Point<2> (ix*invn, iy*invn); pref[ii] = Point<2> (ix*invn, iy*invn);
@ -1496,10 +1493,9 @@ namespace netgen
for (size_t i = 0; i < simd_npt; i++) for (size_t i = 0; i < simd_npt; i++)
{ {
simd_pref[i](0).SIMD_function ([&] (size_t j) { size_t ii = i*simd_size+j; return (ii < npt) ? pref[ii](0) : 0; }, std::true_type()); simd_pref[i](0) = [&] (size_t j) { size_t ii = i*simd_size+j; return (ii < npt) ? pref[ii](0) : 0; };
simd_pref[i](1).SIMD_function ([&] (size_t j) { size_t ii = i*simd_size+j; return (ii < npt) ? pref[ii](1) : 0; }, std::true_type()); simd_pref[i](1) = [&] (size_t j) { size_t ii = i*simd_size+j; return (ii < npt) ? pref[ii](1) : 0; };
} }
#endif
Array<int> ind_reftrig; Array<int> ind_reftrig;
for (int iy = 0, ii = 0; iy < n; iy++,ii++) for (int iy = 0, ii = 0; iy < n; iy++,ii++)