Merge branch 'simdtrafo' into 'master'

Simdtrafo

element - trafo for simd-points is working (2d, 3d, surfaces)

See merge request !4
This commit is contained in:
Matthias Hochsteger 2016-07-13 11:11:30 +02:00
commit 80a2c61336
13 changed files with 1477 additions and 386 deletions

View File

@ -416,9 +416,9 @@ namespace netgen
using Array<T>::data;
using Array<T>::ownmem;
// T mem[S]; // Intel C++ calls dummy constructor
T mem[S]; // Intel C++ calls dummy constructor
// char mem[S*sizeof(T)];
double mem[(S*sizeof(T)+7) / 8];
// double mem[(S*sizeof(T)+7) / 8];
public:
/// Generate array of logical and physical size asize
explicit ArrayMem(int asize = 0)

View File

@ -46,4 +46,7 @@
#include "gzstream.h"
#include "archive_base.hpp"
#include "mysimd.hpp"
#endif

367
libsrc/general/mysimd.hpp Normal file
View File

@ -0,0 +1,367 @@
#ifndef FILE_MYSIMD
#define FILE_MYSIMD
/**************************************************************************/
/* File: mysimd.hpp */
/* Author: Joachim Schoeberl */
/* Date: 25. Mar. 16 */
/**************************************************************************/
#include <immintrin.h>
#ifdef WIN32
inline __m128d operator- (__m128d a) { return _mm_xor_pd(a, _mm_set1_pd(-0.0)); }
inline __m128d operator+ (__m128d a, __m128d b) { return _mm_add_pd(a,b); }
inline __m128d operator- (__m128d a, __m128d b) { return _mm_sub_pd(a,b); }
inline __m128d operator* (__m128d a, __m128d b) { return _mm_mul_pd(a,b); }
inline __m128d operator/ (__m128d a, __m128d b) { return _mm_div_pd(a,b); }
inline __m128d operator* (double a, __m128d b) { return _mm_set1_pd(a)*b; }
inline __m128d operator* (__m128d b, double a) { return _mm_set1_pd(a)*b; }
inline __m128d operator+= (__m128d &a, __m128d b) { return a = a+b; }
inline __m128d operator-= (__m128d &a, __m128d b) { return a = a-b; }
inline __m128d operator*= (__m128d &a, __m128d b) { return a = a*b; }
inline __m128d operator/= (__m128d &a, __m128d b) { return a = a/b; }
inline __m256d operator- (__m256d a) { return _mm256_xor_pd(a, _mm256_set1_pd(-0.0)); }
inline __m256d operator+ (__m256d a, __m256d b) { return _mm256_add_pd(a,b); }
inline __m256d operator- (__m256d a, __m256d b) { return _mm256_sub_pd(a,b); }
inline __m256d operator* (__m256d a, __m256d b) { return _mm256_mul_pd(a,b); }
inline __m256d operator/ (__m256d a, __m256d b) { return _mm256_div_pd(a,b); }
inline __m256d operator* (double a, __m256d b) { return _mm256_set1_pd(a)*b; }
inline __m256d operator* (__m256d b, double a) { return _mm256_set1_pd(a)*b; }
inline __m256d operator+= (__m256d &a, __m256d b) { return a = a+b; }
inline __m256d operator-= (__m256d &a, __m256d b) { return a = a-b; }
inline __m256d operator*= (__m256d &a, __m256d b) { return a = a*b; }
inline __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; }
#endif
namespace netgen
{
template <typename T> class SIMD;
template <typename T>
struct has_call_operator
{
template <typename C> static std::true_type check( decltype( sizeof(&C::operator() )) ) { return std::true_type(); }
template <typename> static std::false_type check(...) { return std::false_type(); }
typedef decltype( check<T>(sizeof(char)) ) type;
static constexpr type value = type();
};
#ifdef __AVX2__
template<>
class alignas(32) SIMD<double>
{
__m256d data;
public:
static constexpr int Size() { return 4; }
SIMD () = default;
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
SIMD (double val)
{
data = _mm256_set1_pd(val);
}
SIMD (__m256d adata)
: data(adata)
{ ; }
/*
template <typename T>
SIMD (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
}
*/
/*
template <typename T>
SIMD & operator= (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
return *this;
}
*/
template <typename Function>
void SIMD_function (const Function & func, std::true_type)
{
data = _mm256_set_pd(func(3), func(2), func(1), func(0));
}
// not a function
void SIMD_function (double const * p, std::false_type)
{
data = _mm256_loadu_pd(p);
}
void SIMD_function (double val, std::false_type)
{
data = _mm256_set1_pd(val);
}
void SIMD_function (__m256d _data, std::false_type)
{
data = _data;
}
inline double operator[] (int i) const { return ((double*)(&data))[i]; }
inline __m256d Data() const { return data; }
inline __m256d & Data() { return data; }
};
inline SIMD<double> operator+ (SIMD<double> a, SIMD<double> b) { return a.Data()+b.Data(); }
inline SIMD<double> operator- (SIMD<double> a, SIMD<double> b) { return a.Data()-b.Data(); }
inline SIMD<double> operator- (SIMD<double> a) { return -a.Data(); }
inline SIMD<double> operator* (SIMD<double> a, SIMD<double> b) { return a.Data()*b.Data(); }
inline SIMD<double> operator/ (SIMD<double> a, SIMD<double> b) { return a.Data()/b.Data(); }
inline SIMD<double> operator* (double a, SIMD<double> b) { return SIMD<double>(a)*b; }
inline SIMD<double> operator* (SIMD<double> b, double a) { return SIMD<double>(a)*b; }
inline SIMD<double> operator+= (SIMD<double> & a, SIMD<double> b) { return a.Data()+=b.Data(); }
inline SIMD<double> operator-= (SIMD<double> & a, SIMD<double> b) { return a.Data()-=b.Data(); }
inline SIMD<double> operator*= (SIMD<double> & a, SIMD<double> b) { return a.Data()*=b.Data(); }
inline SIMD<double> operator/= (SIMD<double> & a, SIMD<double> b) { return a.Data()/=b.Data(); }
using std::sqrt;
using std::fabs;
inline SIMD<double> sqrt (SIMD<double> a) { return _mm256_sqrt_pd(a.Data()); }
inline SIMD<double> fabs (SIMD<double> a) { return _mm256_max_pd(a.Data(), -a.Data()); }
inline SIMD<double> L2Norm2 (SIMD<double> a) { return a.Data()*a.Data(); }
inline SIMD<double> Trans (SIMD<double> a) { return a; }
inline SIMD<double> IfPos (SIMD<double> a, SIMD<double> b, SIMD<double> c)
{
auto cp = _mm256_cmp_pd (a.Data(), _mm256_setzero_pd(), _CMP_GT_OS);
return _mm256_blendv_pd(c.Data(), b.Data(), cp);
}
inline double HSum (SIMD<double> sd)
{
__m128d hv = _mm_add_pd (_mm256_extractf128_pd(sd.Data(),0), _mm256_extractf128_pd(sd.Data(),1));
return _mm_cvtsd_f64 (_mm_hadd_pd (hv, hv));
}
#else
template<>
class SIMD<double>
{
double data;
public:
static constexpr int Size() { return 1; }
SIMD () = default;
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
SIMD (double val)
: data(val) { ; }
/*
template <typename T>
SIMD (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
}
*/
template <typename T>
SIMD & operator= (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
return *this;
}
template <typename Function>
void SIMD_function (const Function & func, std::true_type)
{
data = func(0);
}
// not a function
void SIMD_function (double const * p, std::false_type)
{
data = *p;
}
void SIMD_function (double val, std::false_type)
{
data = val;
}
double operator[] (int i) const { return ((double*)(&data))[i]; }
double Data() const { return data; }
double & Data() { return data; }
};
inline SIMD<double> operator+ (SIMD<double> a, SIMD<double> b) { return a.Data()+b.Data(); }
inline SIMD<double> operator- (SIMD<double> a, SIMD<double> b) { return a.Data()-b.Data(); }
inline SIMD<double> operator- (SIMD<double> a) { return -a.Data(); }
inline SIMD<double> operator* (SIMD<double> a, SIMD<double> b) { return a.Data()*b.Data(); }
inline SIMD<double> operator/ (SIMD<double> a, SIMD<double> b) { return a.Data()/b.Data(); }
inline SIMD<double> operator* (double a, SIMD<double> b) { return SIMD<double>(a)*b; }
inline SIMD<double> operator* (SIMD<double> b, double a) { return SIMD<double>(a)*b; }
inline SIMD<double> operator+= (SIMD<double> & a, SIMD<double> b) { return a.Data()+=b.Data(); }
inline SIMD<double> operator-= (SIMD<double> & a, SIMD<double> b) { return a.Data()-=b.Data(); }
inline SIMD<double> operator*= (SIMD<double> & a, SIMD<double> b) { return a.Data()*=b.Data(); }
inline SIMD<double> operator/= (SIMD<double> & a, SIMD<double> b) { return a.Data()/=b.Data(); }
using std::sqrt;
using std::fabs;
inline SIMD<double> sqrt (SIMD<double> a) { return std::sqrt(a.Data()); }
inline SIMD<double> fabs (SIMD<double> a) { return std::fabs(a.Data()); }
inline SIMD<double> L2Norm2 (SIMD<double> a) { return a.Data()*a.Data(); }
inline SIMD<double> Trans (SIMD<double> a) { return a; }
inline SIMD<double> IfPos (SIMD<double> a, SIMD<double> b, SIMD<double> c)
{
return (a.Data() > 0) ? b : c;
}
inline double HSum (SIMD<double> sd)
{ return sd.Data(); }
#endif
template <typename T>
ostream & operator<< (ostream & ost, SIMD<T> simd)
{
ost << simd[0];
for (int i = 1; i < simd.Size(); i++)
ost << " " << simd[i];
return ost;
}
/*
using std::exp;
inline netgen::SIMD<double> exp (netgen::SIMD<double> a) {
return netgen::SIMD<double>([&](int i)->double { return exp(a[i]); } );
}
using std::log;
inline netgen::SIMD<double> log (netgen::SIMD<double> a) {
return netgen::SIMD<double>([&](int i)->double { return log(a[i]); } );
}
using std::pow;
inline netgen::SIMD<double> pow (netgen::SIMD<double> a, double x) {
return netgen::SIMD<double>([&](int i)->double { return pow(a[i],x); } );
}
*/
template <int D, typename T>
class MultiSIMD
{
SIMD<T> head;
MultiSIMD<D-1,T> tail;
public:
MultiSIMD () = default;
MultiSIMD (const MultiSIMD & ) = default;
MultiSIMD (T v) : head(v), tail(v) { ; }
MultiSIMD (SIMD<T> _head, MultiSIMD<D-1,T> _tail)
: head(_head), tail(_tail) { ; }
SIMD<T> Head() const { return head; }
MultiSIMD<D-1,T> Tail() const { return tail; }
SIMD<T> & Head() { return head; }
MultiSIMD<D-1,T> & Tail() { return tail; }
template <int NR>
SIMD<T> Get() const { return NR==0 ? head : tail.template Get<NR-1>(); }
template <int NR>
SIMD<T> & Get() { return NR==0 ? head : tail.template Get<NR-1>(); }
};
template <typename T>
class MultiSIMD<2,T>
{
SIMD<T> v0, v1;
public:
MultiSIMD () = default;
MultiSIMD (const MultiSIMD & ) = default;
MultiSIMD (T v) : v0(v), v1(v) { ; }
MultiSIMD (SIMD<T> _v0, SIMD<T> _v1) : v0(_v0), v1(_v1) { ; }
SIMD<T> Head() const { return v0; }
SIMD<T> Tail() const { return v1; }
SIMD<T> & Head() { return v0; }
SIMD<T> & Tail() { return v1; }
template <int NR>
SIMD<T> Get() const { return NR==0 ? v0 : v1; }
template <int NR>
SIMD<T> & Get() { return NR==0 ? v0 : v1; }
};
template <int D> inline MultiSIMD<D,double> operator+ (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()+b.Head(), a.Tail()+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator+ (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a+b.Head(), a+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator+ (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> (a+b.Head(), a+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()-b.Head(), a.Tail()-b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a-b.Head(), a-b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> (b.Head()-a, b.Tail()-a); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> a)
{ return MultiSIMD<D,double> (-a.Head(), -a.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()*b.Head(), a.Tail()*b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator/ (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()/b.Head(), a.Tail()/b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> ( a*b.Head(), a*b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> ( a*b.Head(), a*b.Tail()); }
template <int D> inline MultiSIMD<D,double> & operator+= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()+=b.Head(); a.Tail()+=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> operator-= (MultiSIMD<D,double> & a, double b)
{ a.Head()-=b; a.Tail()-=b; return a; }
template <int D> inline MultiSIMD<D,double> operator-= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()-=b.Head(); a.Tail()-=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> & operator*= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()*=b.Head(); a.Tail()*=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> & operator*= (MultiSIMD<D,double> & a, double b)
{ a.Head()*=b; a.Tail()*=b; return a; }
// inline MultiSIMD<double> operator/= (MultiSIMD<double> & a, MultiSIMD<double> b) { return a.Data()/=b.Data(); }
template <int D, typename T>
ostream & operator<< (ostream & ost, MultiSIMD<D,T> multi)
{
ost << multi.Head() << " " << multi.Tail();
return ost;
}
}
#endif

View File

@ -12,31 +12,31 @@ namespace netgen
{
template <int D> class Vec;
template <int D> class Point;
template <int D, typename T = double> class Vec;
template <int D, typename T = double> class Point;
template <int D>
template <int D, typename T>
class Point
{
protected:
double x[D];
T x[D];
public:
Point () { ; }
Point (double ax) { for (int i = 0; i < D; i++) x[i] = ax; }
Point (double ax, double ay)
Point (T ax) { for (int i = 0; i < D; i++) x[i] = ax; }
Point (T ax, T ay)
{
// static_assert(D==2, "Point<D> constructor with 2 args called");
x[0] = ax; x[1] = ay;
}
Point (double ax, double ay, double az)
Point (T ax, T ay, T az)
{
// static_assert(D==3, "Point<D> constructor with 3 args called");
x[0] = ax; x[1] = ay; x[2] = az;
}
Point (double ax, double ay, double az, double au)
Point (T ax, T ay, T az, T au)
{ x[0] = ax; x[1] = ay; x[2] = az; x[3] = au;}
Point (const Point<D> & p2)
@ -52,39 +52,39 @@ namespace netgen
return *this;
}
Point & operator= (double val)
Point & operator= (T val)
{
for (int i = 0; i < D; i++) x[i] = val;
return *this;
}
double & operator() (int i) { return x[i]; }
const double & operator() (int i) const { return x[i]; }
T & operator() (int i) { return x[i]; }
const T & operator() (int i) const { return x[i]; }
operator const double* () const { return x; }
operator const T* () const { return x; }
};
template <int D>
template <int D, typename T>
class Vec
{
protected:
double x[D];
T x[D];
public:
Vec () { ; } // for (int i = 0; i < D; i++) x[i] = 0; }
Vec (double ax) { for (int i = 0; i < D; i++) x[i] = ax; }
Vec (double ax, double ay)
Vec (T ax) { for (int i = 0; i < D; i++) x[i] = ax; }
Vec (T ax, T ay)
{
// static_assert(D==2, "Vec<D> constructor with 2 args called");
x[0] = ax; x[1] = ay;
}
Vec (double ax, double ay, double az)
Vec (T ax, T ay, T az)
{
// static_assert(D==3, "Vec<D> constructor with 3 args called");
x[0] = ax; x[1] = ay; x[2] = az;
}
Vec (double ax, double ay, double az, double au)
Vec (T ax, T ay, T az, T au)
{ x[0] = ax; x[1] = ay; x[2] = az; x[3] = au; }
Vec (const Vec<D> & p2)
@ -104,28 +104,28 @@ namespace netgen
return *this;
}
Vec & operator= (double s)
Vec & operator= (T s)
{
for (int i = 0; i < D; i++) x[i] = s;
return *this;
}
double & operator() (int i) { return x[i]; }
const double & operator() (int i) const { return x[i]; }
T & operator() (int i) { return x[i]; }
const T & operator() (int i) const { return x[i]; }
operator const double* () const { return x; }
operator const T* () const { return x; }
double Length () const
T Length () const
{
double l = 0;
T l = 0;
for (int i = 0; i < D; i++)
l += x[i] * x[i];
return sqrt (l);
}
double Length2 () const
T Length2 () const
{
double l = 0;
T l = 0;
for (int i = 0; i < D; i++)
l += x[i] * x[i];
return l;
@ -133,7 +133,7 @@ namespace netgen
const Vec<D> & Normalize ()
{
double l = Length();
T l = Length();
if (l != 0)
for (int i = 0; i < D; i++)
x[i] /= l;
@ -147,19 +147,19 @@ namespace netgen
template <int H, int W=H>
template <int H, int W=H, typename T = double>
class Mat
{
protected:
double x[H*W];
T x[H*W];
public:
Mat () { ; }
Mat (const Mat & b)
{ for (int i = 0; i < H*W; i++) x[i] = b.x[i]; }
Mat & operator= (double s)
Mat & operator= (T s)
{
for (int i = 0; i < H*W; i++) x[i] = s;
return *this;
@ -171,30 +171,30 @@ namespace netgen
return *this;
}
double & operator() (int i, int j) { return x[i*W+j]; }
const double & operator() (int i, int j) const { return x[i*W+j]; }
double & operator() (int i) { return x[i]; }
const double & operator() (int i) const { return x[i]; }
T & operator() (int i, int j) { return x[i*W+j]; }
const T & operator() (int i, int j) const { return x[i*W+j]; }
T & operator() (int i) { return x[i]; }
const T & operator() (int i) const { return x[i]; }
Vec<H> Col (int i) const
Vec<H,T> Col (int i) const
{
Vec<H> hv;
Vec<H,T> hv;
for (int j = 0; j < H; j++)
hv(j) = x[j*W+i];
return hv;
}
Vec<W> Row (int i) const
Vec<W,T> Row (int i) const
{
Vec<W> hv;
Vec<W,T> hv;
for (int j = 0; j < W; j++)
hv(j) = x[i*W+j];
return hv;
}
void Solve (const Vec<H> & rhs, Vec<W> & sol) const
void Solve (const Vec<H,T> & rhs, Vec<W,T> & sol) const
{
Mat<W,H> inv;
Mat<W,H,T> inv;
CalcInverse (*this, inv);
sol = inv * rhs;
}

View File

@ -247,13 +247,14 @@ namespace netgen
}
*/
inline Mat<2,2> operator* (const Mat<2,2> & a, const Mat<2,2> & b)
template <typename T>
inline Mat<2,2,T> operator* (const Mat<2,2,T> & a, const Mat<2,2,T> & b)
{
Mat<2,2> m;
Mat<2,2,T> m;
for (int i = 0; i < 2; i++)
for (int j = 0; j < 2; j++)
{
double sum = 0;
T sum(0);
for (int k = 0; k < 2; k++)
sum += a(i,k) * b(k, j);
m(i,j) = sum;
@ -275,14 +276,14 @@ namespace netgen
return m;
}
inline Mat<3,2> operator* (const Mat<3,2> & a, const Mat<2,2> & b)
template <typename T>
inline Mat<3,2,T> operator* (const Mat<3,2,T> & a, const Mat<2,2,T> & b)
{
Mat<3,2> m;
Mat<3,2,T> m;
for (int i = 0; i < 3; i++)
for (int j = 0; j < 2; j++)
{
double sum = 0;
T sum(0.0);
for (int k = 0; k < 2; k++)
sum += a(i,k) * b(k, j);
m(i,j) = sum;
@ -306,14 +307,14 @@ namespace netgen
return m;
}
inline Mat<3,3> operator* (const Mat<3,3> & a, const Mat<3,3> & b)
template <typename T>
inline Mat<3,3,T> operator* (const Mat<3,3,T> & a, const Mat<3,3,T> & b)
{
Mat<3,3> m;
Mat<3,3,T> m;
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
{
double sum = 0;
T sum = T(0);
for (int k = 0; k < 3; k++)
sum += a(i,k) * b(k, j);
m(i,j) = sum;
@ -348,8 +349,8 @@ namespace netgen
template <int D>
inline ostream & operator<< (ostream & ost, const Vec<D> & a)
template <int D, typename T>
inline ostream & operator<< (ostream & ost, const Vec<D,T> & a)
{
ost << "(";
for (int i = 0; i < D-1; i++)
@ -358,8 +359,8 @@ namespace netgen
return ost;
}
template <int D>
inline ostream & operator<< (ostream & ost, const Point<D> & a)
template <int D, typename T>
inline ostream & operator<< (ostream & ost, const Point<D,T> & a)
{
ost << "(";
for (int i = 0; i < D-1; i++)
@ -375,8 +376,8 @@ namespace netgen
return ost;
}
template <int H, int W>
inline ostream & operator<< (ostream & ost, const Mat<H,W> & m)
template <int H, int W, typename T>
inline ostream & operator<< (ostream & ost, const Mat<H,W,T> & m)
{
ost << "(";
for (int i = 0; i < H; i++)

View File

@ -618,6 +618,12 @@ namespace netgen
__m256d * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const
{
mesh->GetCurvedElements().CalcMultiPointSurfaceTransformation<2>
(elnr, npts,
reinterpret_cast<const SIMD<double>*> (xi), sxi,
reinterpret_cast<SIMD<double>*> (x), sx,
reinterpret_cast<SIMD<double>*> (dxdxi), sdxdxi);
/*
for (int i = 0; i < npts; i++)
{
double hxi[4][2];
@ -638,6 +644,7 @@ namespace netgen
x += sx;
dxdxi += sdxdxi;
}
*/
}
template<> DLL_HEADER void Ngx_Mesh ::
@ -646,6 +653,12 @@ namespace netgen
__m256d * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const
{
mesh->GetCurvedElements().CalcMultiPointElementTransformation
(elnr, npts,
reinterpret_cast<const SIMD<double>*> (xi), sxi,
reinterpret_cast<SIMD<double>*> (x), sx,
reinterpret_cast<SIMD<double>*> (dxdxi), sdxdxi);
/*
for (int i = 0; i < npts; i++)
{
double hxi[4][3];
@ -666,6 +679,7 @@ namespace netgen
x += sx;
dxdxi += sdxdxi;
}
*/
}
template<> DLL_HEADER void Ngx_Mesh ::
@ -712,6 +726,12 @@ namespace netgen
__m256d * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const
{
mesh->GetCurvedElements().CalcMultiPointSurfaceTransformation<3>
(elnr, npts,
reinterpret_cast<const SIMD<double>*> (xi), sxi,
reinterpret_cast<SIMD<double>*> (x), sx,
reinterpret_cast<SIMD<double>*> (dxdxi), sdxdxi);
/*
for (int i = 0; i < npts; i++)
{
double hxi[4][2];
@ -732,6 +752,7 @@ namespace netgen
x += sx;
dxdxi += sdxdxi;
}
*/
}
#endif

View File

@ -152,9 +152,120 @@ extern ostream & operator<< (ostream & ost, const DenseMatrix & m);
template <int WIDTH>
template <int WIDTH, typename T = double>
class MatrixFixWidth
{
protected:
int height;
T * __restrict data;
bool ownmem;
public:
///
MatrixFixWidth ()
{ height = 0; data = 0; ownmem = false; }
///
MatrixFixWidth (int h)
{ height = h; data = new T[WIDTH*height]; ownmem = true; }
///
MatrixFixWidth (int h, T * adata)
{ height = h; data = adata; ownmem = false; }
///
MatrixFixWidth (const MatrixFixWidth & m2)
: height(m2.height), data(m2.data), ownmem(false)
{ ; }
///
~MatrixFixWidth ()
{ if (ownmem) delete [] data; }
void SetSize (int h)
{
if (h != height)
{
if (ownmem) delete data;
height = h;
data = new T[WIDTH*height];
ownmem = true;
}
}
///
int Height() const { return height; }
///
int Width() const { return WIDTH; }
MatrixFixWidth & operator= (const MatrixFixWidth & m2)
{
for (int i = 0; i < height*WIDTH; i++)
data[i] = m2.data[i];
}
///
MatrixFixWidth & operator= (T v)
{
for (int i = 0; i < height*WIDTH; i++)
data[i] = v;
return *this;
}
/*
///
void Mult (const FlatVector & v, FlatVector & prod) const
{
T sum;
const T * mp, * sp;
T * dp;
mp = data;
dp = &prod[0];
for (int i = 0; i < height; i++)
{
sum = 0;
sp = &v[0];
for (int j = 0; j < WIDTH; j++)
{
sum += *mp * *sp;
mp++;
sp++;
}
*dp = sum;
dp++;
}
}
*/
T & operator() (int i, int j)
{ return data[i*WIDTH+j]; }
const T & operator() (int i, int j) const
{ return data[i*WIDTH+j]; }
MatrixFixWidth & operator*= (T v)
{
if (data)
for (int i = 0; i < height*WIDTH; i++)
data[i] *= v;
return *this;
}
const T & Get(int i, int j) const { return data[(i-1)*WIDTH+j-1]; }
///
const T & Get(int i) const { return data[i-1]; }
///
void Set(int i, int j, T v) { data[(i-1)*WIDTH+j-1] = v; }
///
T & Elem(int i, int j) { return data[(i-1)*WIDTH+j-1]; }
///
const T & ConstElem(int i, int j) const { return data[(i-1)*WIDTH+j-1]; }
};
template <int WIDTH>
class MatrixFixWidth<WIDTH,double>
{
protected:
int height;
double * data;
@ -261,6 +372,9 @@ public:
};
template <int WIDTH>
extern ostream & operator<< (ostream & ost, const MatrixFixWidth<WIDTH> & m)
{

View File

@ -8,6 +8,50 @@
/* *************************************************************************/
template <typename T>
class TFlatVector
{
protected:
int s;
T * data;
public:
TFlatVector () { ; }
TFlatVector (int as, T * adata)
{ s = as; data = adata; }
int Size () const
{ return s; }
TFlatVector & operator= (const TFlatVector & v)
{ memcpy (data, v.data, s*sizeof(T)); return *this; }
TFlatVector & operator= (T scal)
{
for (int i = 0; i < s; i++) data[i] = scal;
return *this;
}
T & operator[] (int i) { return data[i]; }
const T & operator[] (int i) const { return data[i]; }
T & operator() (int i) { return data[i]; }
const T & operator() (int i) const { return data[i]; }
// double & Elem (int i) { return data[i-1]; }
// const double & Get (int i) const { return data[i-1]; }
// void Set (int i, double val) { data[i-1] = val; }
TFlatVector & operator*= (T scal)
{
for (int i = 0; i < s; i++) data[i] *= scal;
return *this;
}
};
class FlatVector
{
protected:
@ -75,11 +119,13 @@ public:
return sqrt (sum);
}
operator TFlatVector<double> () const { return TFlatVector<double> (s, data); }
friend double operator* (const FlatVector & v1, const FlatVector & v2);
};
class Vector : public FlatVector
{
bool ownmem;
@ -113,6 +159,7 @@ public:
}
}
operator TFlatVector<double> () const { return TFlatVector<double> (s, data); }
};
template <int S>

File diff suppressed because it is too large Load Diff

View File

@ -126,25 +126,25 @@ public:
double * x, size_t sx,
double * dxdxi, size_t sdxdxi);
void CalcMultiPointSurfaceTransformation (Array< Point<2> > * xi, SurfaceElementIndex elnr,
Array< Point<3> > * x,
Array< Mat<3,2> > * dxdxi);
template <int DIM_SPACE>
template <int DIM_SPACE, typename T>
void CalcMultiPointSurfaceTransformation (SurfaceElementIndex elnr, int n,
const double * xi, size_t sxi,
double * x, size_t sx,
double * dxdxi, size_t sdxdxi);
const T * xi, size_t sxi,
T * x, size_t sx,
T * dxdxi, size_t sdxdxi);
void CalcMultiPointElementTransformation (Array< Point<3> > * xi, ElementIndex elnr,
Array< Point<3> > * x,
Array< Mat<3,3> > * dxdxi);
template <typename T>
void CalcMultiPointElementTransformation (ElementIndex elnr, int n,
const double * xi, size_t sxi,
double * x, size_t sx,
double * dxdxi, size_t sdxdxi);
const T * xi, size_t sxi,
T * x, size_t sx,
T * dxdxi, size_t sdxdxi);
@ -196,11 +196,14 @@ private:
Vec<3> hcoefs[10]; // enough for second order tets
};
void CalcElementShapes (ElementInfo & info, const Point<3> & xi, Vector & shapes) const;
template <typename T>
void CalcElementShapes (ElementInfo & info, Point<3,T> xi, TFlatVector<T> shapes) const;
void GetCoefficients (ElementInfo & info, Vec<3> * coefs) const;
void CalcElementDShapes (ElementInfo & info, const Point<3> & xi, MatrixFixWidth<3> & dshapes) const;
template <typename T>
void CalcElementDShapes (ElementInfo & info, const Point<3,T> xi, MatrixFixWidth<3,T> dshapes) const;
template <typename T>
bool EvaluateMapping (ElementInfo & info, const Point<3,T> xi, Point<3,T> & x, Mat<3,3,T> & jac) const;
class SurfaceElementInfo
{
@ -213,10 +216,15 @@ private:
int facenr;
};
void CalcElementShapes (SurfaceElementInfo & elinfo, const Point<2> & xi, Vector & shapes) const;
template <typename T>
void CalcElementShapes (SurfaceElementInfo & elinfo, const Point<2,T> xi, TFlatVector<T> shapes) const;
template <int DIM_SPACE>
void GetCoefficients (SurfaceElementInfo & elinfo, Array<Vec<DIM_SPACE> > & coefs) const;
void CalcElementDShapes (SurfaceElementInfo & elinfo, const Point<2> & xi, MatrixFixWidth<2> & dshapes) const;
template <typename T>
void CalcElementDShapes (SurfaceElementInfo & elinfo, const Point<2,T> xi, MatrixFixWidth<2,T> dshapes) const;
template <int DIM_SPACE, typename T>
bool EvaluateMapping (SurfaceElementInfo & info, const Point<2,T> xi, Point<DIM_SPACE,T> & x, Mat<DIM_SPACE,2,T> & jac) const;
};

View File

@ -541,6 +541,30 @@ namespace netgen
}
}
template <typename T>
void Element2d :: GetShapeNew (const Point<2,T> & p, TFlatVector<T> shape) const
{
switch (typ)
{
case TRIG:
{
shape(0) = p(0);
shape(1) = p(1);
shape(2) = 1-p(0)-p(1);
break;
}
case QUAD:
{
shape(0) = (1-p(0))*(1-p(1));
shape(1) = p(0) *(1-p(1));
shape(2) = p(0) * p(1) ;
shape(3) = (1-p(0))* p(1) ;
break;
}
}
}
@ -587,15 +611,15 @@ namespace netgen
}
template <typename T>
void Element2d ::
GetDShapeNew (const Point<2> & p, MatrixFixWidth<2> & dshape) const
GetDShapeNew (const Point<2,T> & p, MatrixFixWidth<2,T> & dshape) const
{
switch (typ)
{
case TRIG:
{
dshape = 0;
dshape = T(0.0);
dshape(0,0) = 1;
dshape(1,1) = 1;
dshape(2,0) = -1;
@ -1850,8 +1874,8 @@ namespace netgen
}
void Element :: GetShapeNew (const Point<3> & p, FlatVector & shape) const
template <typename T>
void Element :: GetShapeNew (const Point<3,T> & p, TFlatVector<T> shape) const
{
/*
if (shape.Size() < GetNP())
@ -1874,10 +1898,10 @@ namespace netgen
case TET10:
{
double lam1 = p(0);
double lam2 = p(1);
double lam3 = p(2);
double lam4 = 1-p(0)-p(1)-p(2);
T lam1 = p(0);
T lam2 = p(1);
T lam3 = p(2);
T lam4 = 1-p(0)-p(1)-p(2);
shape(0) = 2 * lam1 * (lam1-0.5);
shape(1) = 2 * lam2 * (lam2-0.5);
@ -1897,11 +1921,12 @@ namespace netgen
case PYRAMID:
{
double noz = 1-p(2);
if (noz == 0.0) noz = 1e-10;
T noz = 1-p(2);
// if (noz == 0.0) noz = 1e-10;
noz += T(1e-12);
double xi = p(0) / noz;
double eta = p(1) / noz;
T xi = p(0) / noz;
T eta = p(1) / noz;
shape(0) = (1-xi)*(1-eta) * (noz);
shape(1) = ( xi)*(1-eta) * (noz);
shape(2) = ( xi)*( eta) * (noz);
@ -1965,15 +1990,15 @@ namespace netgen
}
}
template <typename T>
void Element ::
GetDShapeNew (const Point<3> & p, MatrixFixWidth<3> & dshape) const
GetDShapeNew (const Point<3,T> & p, MatrixFixWidth<3,T> & dshape) const
{
switch (typ)
{
case TET:
{
dshape = 0;
dshape = T(0.0);
dshape(0,0) = 1;
dshape(1,1) = 1;
dshape(2,2) = 1;
@ -1984,7 +2009,7 @@ namespace netgen
}
case PRISM:
{
dshape = 0;
dshape = T(0.0);
dshape(0,0) = 1-p(2);
dshape(0,2) = -p(0);
dshape(1,1) = 1-p(2);
@ -2007,23 +2032,40 @@ namespace netgen
{
int np = GetNP();
double eps = 1e-6;
Vector shaper(np), shapel(np);
ArrayMem<T,100> mem(2*np);
TFlatVector<T> shaper(np, &mem[0]);
TFlatVector<T> shapel(np, &mem[np]);
// Vector shaper(np), shapel(np);
for (int i = 1; i <= 3; i++)
for (int i = 0; i < 3; i++)
{
Point3d pr(p), pl(p);
pr.X(i) += eps;
pl.X(i) -= eps;
Point<3,T> pr(p), pl(p);
pr(i) += eps;
pl(i) -= eps;
GetShapeNew (pr, shaper);
GetShapeNew (pl, shapel);
for (int j = 0; j < np; j++)
dshape(j, i-1) = (shaper(j) - shapel(j)) / (2 * eps);
dshape(j, i) = (shaper(j) - shapel(j)) / (2 * eps);
}
}
}
}
template void Element2d :: GetShapeNew (const Point<2,double> & p, TFlatVector<double> shape) const;
template void Element2d :: GetShapeNew (const Point<2,SIMD<double>> & p, TFlatVector<SIMD<double>> shape) const;
template void Element2d::GetDShapeNew<double> (const Point<2> &, MatrixFixWidth<2> &) const;
template void Element2d::GetDShapeNew<SIMD<double>> (const Point<2,SIMD<double>> &, MatrixFixWidth<2,SIMD<double>> &) const;
template void Element :: GetShapeNew (const Point<3,double> & p, TFlatVector<double> shape) const;
template void Element :: GetShapeNew (const Point<3,SIMD<double>> & p, TFlatVector<SIMD<double>> shape) const;
template void Element::GetDShapeNew<double> (const Point<3> &, MatrixFixWidth<3> &) const;
template void Element::GetDShapeNew<SIMD<double>> (const Point<3,SIMD<double>> &, MatrixFixWidth<3,SIMD<double>> &) const;
void Element ::
GetPointMatrix (const T_POINTS & points,
DenseMatrix & pmat) const

View File

@ -125,7 +125,6 @@ namespace netgen
PointIndex operator-- (int) { PointIndex hi(*this); i--; return hi; }
PointIndex operator++ () { i++; return *this; }
PointIndex operator-- () { i--; return *this; }
#ifdef BASE0
enum { BASE = 0 };
#else
@ -448,9 +447,13 @@ namespace netgen
void GetShape (const Point2d & p, class Vector & shape) const;
void GetShapeNew (const Point<2> & p, class FlatVector & shape) const;
template <typename T>
void GetShapeNew (const Point<2,T> & p, TFlatVector<T> shape) const;
/// matrix 2 * np
void GetDShape (const Point2d & p, class DenseMatrix & dshape) const;
void GetDShapeNew (const Point<2> & p, class MatrixFixWidth<2> & dshape) const;
template <typename T>
void GetDShapeNew (const Point<2,T> & p, class MatrixFixWidth<2,T> & dshape) const;
/// matrix 2 * np
void GetPointMatrix (const Array<Point2d> & points,
class DenseMatrix & pmat) const;
@ -717,10 +720,13 @@ namespace netgen
class DenseMatrix & trans) const;
void GetShape (const Point<3> & p, class Vector & shape) const;
void GetShapeNew (const Point<3> & p, class FlatVector & shape) const;
// void GetShapeNew (const Point<3> & p, class FlatVector & shape) const;
template <typename T>
void GetShapeNew (const Point<3,T> & p, TFlatVector<T> shape) const;
/// matrix 2 * np
void GetDShape (const Point<3> & p, class DenseMatrix & dshape) const;
void GetDShapeNew (const Point<3> & p, class MatrixFixWidth<3> & dshape) const;
template <typename T>
void GetDShapeNew (const Point<3,T> & p, class MatrixFixWidth<3,T> & dshape) const;
/// matrix 3 * np
void GetPointMatrix (const T_POINTS & points,
class DenseMatrix & pmat) const;

View File

@ -1846,7 +1846,7 @@ namespace netgen
for (int i = 0; i < cnt_valid; i++)
{
el.GetShapeNew (locgrid[i], shape);
el.GetShapeNew<double> (locgrid[i], shape);
Point<3> pglob;
for (int j = 0; j < 3; j++)
{
@ -3913,7 +3913,7 @@ namespace netgen
for (int i = 0; i < cnt_valid; i++)
{
el.GetShapeNew (locgrid[i], shape);
el.GetShapeNew<double> (locgrid[i], shape);
Point<3> pglob;
for (int j = 0; j < 3; j++)
{