using simds for element-trafo, added type-template to many classes

This commit is contained in:
Joachim Schöberl 2016-07-06 17:12:57 +01:00
parent 1a6ace7138
commit 8414cb2d60
12 changed files with 854 additions and 232 deletions

View File

@ -416,9 +416,9 @@ namespace netgen
using Array<T>::data; using Array<T>::data;
using Array<T>::ownmem; using Array<T>::ownmem;
// T mem[S]; // Intel C++ calls dummy constructor T mem[S]; // Intel C++ calls dummy constructor
// char mem[S*sizeof(T)]; // char mem[S*sizeof(T)];
double mem[(S*sizeof(T)+7) / 8]; // double mem[(S*sizeof(T)+7) / 8];
public: public:
/// Generate array of logical and physical size asize /// Generate array of logical and physical size asize
explicit ArrayMem(int asize = 0) explicit ArrayMem(int asize = 0)

View File

@ -46,4 +46,7 @@
#include "gzstream.h" #include "gzstream.h"
#include "archive_base.hpp" #include "archive_base.hpp"
#include "mysimd.hpp"
#endif #endif

358
libsrc/general/mysimd.hpp Normal file
View File

@ -0,0 +1,358 @@
#ifndef FILE_MYSIMD
#define FILE_MYSIMD
/**************************************************************************/
/* File: mysimd.hpp */
/* Author: Joachim Schoeberl */
/* Date: 25. Mar. 16 */
/**************************************************************************/
#include <immintrin.h>
#ifdef WIN32
inline __m128d operator- (__m128d a) { return _mm_xor_pd(a, _mm_set1_pd(-0.0)); }
inline __m128d operator+ (__m128d a, __m128d b) { return _mm_add_pd(a,b); }
inline __m128d operator- (__m128d a, __m128d b) { return _mm_sub_pd(a,b); }
inline __m128d operator* (__m128d a, __m128d b) { return _mm_mul_pd(a,b); }
inline __m128d operator/ (__m128d a, __m128d b) { return _mm_div_pd(a,b); }
inline __m128d operator* (double a, __m128d b) { return _mm_set1_pd(a)*b; }
inline __m128d operator* (__m128d b, double a) { return _mm_set1_pd(a)*b; }
inline __m128d operator+= (__m128d &a, __m128d b) { return a = a+b; }
inline __m128d operator-= (__m128d &a, __m128d b) { return a = a-b; }
inline __m128d operator*= (__m128d &a, __m128d b) { return a = a*b; }
inline __m128d operator/= (__m128d &a, __m128d b) { return a = a/b; }
inline __m256d operator- (__m256d a) { return _mm256_xor_pd(a, _mm256_set1_pd(-0.0)); }
inline __m256d operator+ (__m256d a, __m256d b) { return _mm256_add_pd(a,b); }
inline __m256d operator- (__m256d a, __m256d b) { return _mm256_sub_pd(a,b); }
inline __m256d operator* (__m256d a, __m256d b) { return _mm256_mul_pd(a,b); }
inline __m256d operator/ (__m256d a, __m256d b) { return _mm256_div_pd(a,b); }
inline __m256d operator* (double a, __m256d b) { return _mm256_set1_pd(a)*b; }
inline __m256d operator* (__m256d b, double a) { return _mm256_set1_pd(a)*b; }
inline __m256d operator+= (__m256d &a, __m256d b) { return a = a+b; }
inline __m256d operator-= (__m256d &a, __m256d b) { return a = a-b; }
inline __m256d operator*= (__m256d &a, __m256d b) { return a = a*b; }
inline __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; }
#endif
namespace netgen
{
template <typename T> class SIMD;
template <typename T>
struct has_call_operator
{
template <typename C> static std::true_type check( decltype( sizeof(&C::operator() )) ) { return std::true_type(); }
template <typename> static std::false_type check(...) { return std::false_type(); }
typedef decltype( check<T>(sizeof(char)) ) type;
static constexpr type value = type();
};
#ifdef __AVX2__
template<>
class alignas(32) SIMD<double>
{
__m256d data;
public:
static constexpr int Size() { return 4; }
SIMD () = default;
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
SIMD (double val)
{
data = _mm256_set1_pd(val);
}
SIMD (__m256d adata)
: data(adata)
{ ; }
/*
template <typename T>
SIMD (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
}
*/
/*
template <typename T>
SIMD & operator= (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
return *this;
}
*/
template <typename Function>
void SIMD_function (const Function & func, std::true_type)
{
data = _mm256_set_pd(func(3), func(2), func(1), func(0));
}
// not a function
void SIMD_function (double const * p, std::false_type)
{
data = _mm256_loadu_pd(p);
}
void SIMD_function (double val, std::false_type)
{
data = _mm256_set1_pd(val);
}
void SIMD_function (__m256d _data, std::false_type)
{
data = _data;
}
inline double operator[] (int i) const { return ((double*)(&data))[i]; }
inline __m256d Data() const { return data; }
inline __m256d & Data() { return data; }
};
inline SIMD<double> operator+ (SIMD<double> a, SIMD<double> b) { return a.Data()+b.Data(); }
inline SIMD<double> operator- (SIMD<double> a, SIMD<double> b) { return a.Data()-b.Data(); }
inline SIMD<double> operator- (SIMD<double> a) { return -a.Data(); }
inline SIMD<double> operator* (SIMD<double> a, SIMD<double> b) { return a.Data()*b.Data(); }
inline SIMD<double> operator/ (SIMD<double> a, SIMD<double> b) { return a.Data()/b.Data(); }
inline SIMD<double> operator* (double a, SIMD<double> b) { return SIMD<double>(a)*b; }
inline SIMD<double> operator* (SIMD<double> b, double a) { return SIMD<double>(a)*b; }
inline SIMD<double> operator+= (SIMD<double> & a, SIMD<double> b) { return a.Data()+=b.Data(); }
inline SIMD<double> operator-= (SIMD<double> & a, SIMD<double> b) { return a.Data()-=b.Data(); }
inline SIMD<double> operator*= (SIMD<double> & a, SIMD<double> b) { return a.Data()*=b.Data(); }
inline SIMD<double> operator/= (SIMD<double> & a, SIMD<double> b) { return a.Data()/=b.Data(); }
using std::sqrt;
using std::fabs;
inline SIMD<double> sqrt (SIMD<double> a) { return _mm256_sqrt_pd(a.Data()); }
inline SIMD<double> fabs (SIMD<double> a) { return _mm256_max_pd(a.Data(), -a.Data()); }
inline SIMD<double> L2Norm2 (SIMD<double> a) { return a.Data()*a.Data(); }
inline SIMD<double> Trans (SIMD<double> a) { return a; }
inline SIMD<double> IfPos (SIMD<double> a, SIMD<double> b, SIMD<double> c)
{
auto cp = _mm256_cmp_pd (a.Data(), _mm256_setzero_pd(), _CMP_GT_OS);
return _mm256_blendv_pd(c.Data(), b.Data(), cp);
}
inline double HSum (SIMD<double> sd)
{
__m128d hv = _mm_add_pd (_mm256_extractf128_pd(sd.Data(),0), _mm256_extractf128_pd(sd.Data(),1));
return _mm_cvtsd_f64 (_mm_hadd_pd (hv, hv));
}
#else
template<>
class SIMD<double>
{
double data;
public:
static constexpr int Size() { return 1; }
SIMD () = default;
SIMD (const SIMD &) = default;
SIMD & operator= (const SIMD &) = default;
template <typename T>
SIMD (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
}
template <typename T>
SIMD & operator= (const T & val)
{
// SIMD_function(val, std::is_convertible<T, std::function<double(int)>>());
SIMD_function(val, has_call_operator<T>::value);
return *this;
}
template <typename Function>
void SIMD_function (const Function & func, std::true_type)
{
data = func(0);
}
// not a function
void SIMD_function (double const * p, std::false_type)
{
data = *p;
}
void SIMD_function (double val, std::false_type)
{
data = val;
}
double operator[] (int i) const { return ((double*)(&data))[i]; }
double Data() const { return data; }
double & Data() { return data; }
};
inline SIMD<double> operator+ (SIMD<double> a, SIMD<double> b) { return a.Data()+b.Data(); }
inline SIMD<double> operator- (SIMD<double> a, SIMD<double> b) { return a.Data()-b.Data(); }
inline SIMD<double> operator- (SIMD<double> a) { return -a.Data(); }
inline SIMD<double> operator* (SIMD<double> a, SIMD<double> b) { return a.Data()*b.Data(); }
inline SIMD<double> operator/ (SIMD<double> a, SIMD<double> b) { return a.Data()/b.Data(); }
inline SIMD<double> operator* (double a, SIMD<double> b) { return SIMD<double>(a)*b; }
inline SIMD<double> operator* (SIMD<double> b, double a) { return SIMD<double>(a)*b; }
inline SIMD<double> operator+= (SIMD<double> & a, SIMD<double> b) { return a.Data()+=b.Data(); }
inline SIMD<double> operator-= (SIMD<double> & a, SIMD<double> b) { return a.Data()-=b.Data(); }
inline SIMD<double> operator*= (SIMD<double> & a, SIMD<double> b) { return a.Data()*=b.Data(); }
inline SIMD<double> operator/= (SIMD<double> & a, SIMD<double> b) { return a.Data()/=b.Data(); }
inline SIMD<double> sqrt (SIMD<double> a) { return std::sqrt(a.Data()); }
inline SIMD<double> fabs (SIMD<double> a) { return std::fabs(a.Data()); }
inline SIMD<double> L2Norm2 (SIMD<double> a) { return a.Data()*a.Data(); }
inline SIMD<double> Trans (SIMD<double> a) { return a; }
inline SIMD<double> IfPos (SIMD<double> a, SIMD<double> b, SIMD<double> c)
{
return (a.Data() > 0) ? b : c;
}
inline double HSum (SIMD<double> sd)
{ return sd.Data(); }
#endif
template <typename T>
ostream & operator<< (ostream & ost, SIMD<T> simd)
{
ost << simd[0];
for (int i = 1; i < simd.Size(); i++)
ost << " " << simd[i];
return ost;
}
/*
using std::exp;
inline netgen::SIMD<double> exp (netgen::SIMD<double> a) {
return netgen::SIMD<double>([&](int i)->double { return exp(a[i]); } );
}
using std::log;
inline netgen::SIMD<double> log (netgen::SIMD<double> a) {
return netgen::SIMD<double>([&](int i)->double { return log(a[i]); } );
}
using std::pow;
inline netgen::SIMD<double> pow (netgen::SIMD<double> a, double x) {
return netgen::SIMD<double>([&](int i)->double { return pow(a[i],x); } );
}
*/
template <int D, typename T>
class MultiSIMD
{
SIMD<T> head;
MultiSIMD<D-1,T> tail;
public:
MultiSIMD () = default;
MultiSIMD (const MultiSIMD & ) = default;
MultiSIMD (T v) : head(v), tail(v) { ; }
MultiSIMD (SIMD<T> _head, MultiSIMD<D-1,T> _tail)
: head(_head), tail(_tail) { ; }
SIMD<T> Head() const { return head; }
MultiSIMD<D-1,T> Tail() const { return tail; }
SIMD<T> & Head() { return head; }
MultiSIMD<D-1,T> & Tail() { return tail; }
template <int NR>
SIMD<T> Get() const { return NR==0 ? head : tail.template Get<NR-1>(); }
template <int NR>
SIMD<T> & Get() { return NR==0 ? head : tail.template Get<NR-1>(); }
};
template <typename T>
class MultiSIMD<2,T>
{
SIMD<T> v0, v1;
public:
MultiSIMD () = default;
MultiSIMD (const MultiSIMD & ) = default;
MultiSIMD (T v) : v0(v), v1(v) { ; }
MultiSIMD (SIMD<T> _v0, SIMD<T> _v1) : v0(_v0), v1(_v1) { ; }
SIMD<T> Head() const { return v0; }
SIMD<T> Tail() const { return v1; }
SIMD<T> & Head() { return v0; }
SIMD<T> & Tail() { return v1; }
template <int NR>
SIMD<T> Get() const { return NR==0 ? v0 : v1; }
template <int NR>
SIMD<T> & Get() { return NR==0 ? v0 : v1; }
};
template <int D> inline MultiSIMD<D,double> operator+ (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()+b.Head(), a.Tail()+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator+ (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a+b.Head(), a+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator+ (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> (a+b.Head(), a+b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()-b.Head(), a.Tail()-b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a-b.Head(), a-b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> (b.Head()-a, b.Tail()-a); }
template <int D> inline MultiSIMD<D,double> operator- (MultiSIMD<D,double> a)
{ return MultiSIMD<D,double> (-a.Head(), -a.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()*b.Head(), a.Tail()*b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator/ (MultiSIMD<D,double> a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> (a.Head()/b.Head(), a.Tail()/b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (double a, MultiSIMD<D,double> b)
{ return MultiSIMD<D,double> ( a*b.Head(), a*b.Tail()); }
template <int D> inline MultiSIMD<D,double> operator* (MultiSIMD<D,double> b, double a)
{ return MultiSIMD<D,double> ( a*b.Head(), a*b.Tail()); }
template <int D> inline MultiSIMD<D,double> & operator+= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()+=b.Head(); a.Tail()+=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> operator-= (MultiSIMD<D,double> & a, double b)
{ a.Head()-=b; a.Tail()-=b; return a; }
template <int D> inline MultiSIMD<D,double> operator-= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()-=b.Head(); a.Tail()-=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> & operator*= (MultiSIMD<D,double> & a, MultiSIMD<D,double> b)
{ a.Head()*=b.Head(); a.Tail()*=b.Tail(); return a; }
template <int D> inline MultiSIMD<D,double> & operator*= (MultiSIMD<D,double> & a, double b)
{ a.Head()*=b; a.Tail()*=b; return a; }
// inline MultiSIMD<double> operator/= (MultiSIMD<double> & a, MultiSIMD<double> b) { return a.Data()/=b.Data(); }
template <int D, typename T>
ostream & operator<< (ostream & ost, MultiSIMD<D,T> multi)
{
ost << multi.Head() << " " << multi.Tail();
return ost;
}
}
#endif

View File

@ -12,31 +12,31 @@ namespace netgen
{ {
template <int D> class Vec; template <int D, typename T = double> class Vec;
template <int D> class Point; template <int D, typename T = double> class Point;
template <int D> template <int D, typename T>
class Point class Point
{ {
protected: protected:
double x[D]; T x[D];
public: public:
Point () { ; } Point () { ; }
Point (double ax) { for (int i = 0; i < D; i++) x[i] = ax; } Point (T ax) { for (int i = 0; i < D; i++) x[i] = ax; }
Point (double ax, double ay) Point (T ax, T ay)
{ {
// static_assert(D==2, "Point<D> constructor with 2 args called"); // static_assert(D==2, "Point<D> constructor with 2 args called");
x[0] = ax; x[1] = ay; x[0] = ax; x[1] = ay;
} }
Point (double ax, double ay, double az) Point (T ax, T ay, T az)
{ {
// static_assert(D==3, "Point<D> constructor with 3 args called"); // static_assert(D==3, "Point<D> constructor with 3 args called");
x[0] = ax; x[1] = ay; x[2] = az; x[0] = ax; x[1] = ay; x[2] = az;
} }
Point (double ax, double ay, double az, double au) Point (T ax, T ay, T az, T au)
{ x[0] = ax; x[1] = ay; x[2] = az; x[3] = au;} { x[0] = ax; x[1] = ay; x[2] = az; x[3] = au;}
Point (const Point<D> & p2) Point (const Point<D> & p2)
@ -52,39 +52,39 @@ namespace netgen
return *this; return *this;
} }
Point & operator= (double val) Point & operator= (T val)
{ {
for (int i = 0; i < D; i++) x[i] = val; for (int i = 0; i < D; i++) x[i] = val;
return *this; return *this;
} }
double & operator() (int i) { return x[i]; } T & operator() (int i) { return x[i]; }
const double & operator() (int i) const { return x[i]; } const T & operator() (int i) const { return x[i]; }
operator const double* () const { return x; } operator const T* () const { return x; }
}; };
template <int D> template <int D, typename T>
class Vec class Vec
{ {
protected: protected:
double x[D]; T x[D];
public: public:
Vec () { ; } // for (int i = 0; i < D; i++) x[i] = 0; } Vec () { ; } // for (int i = 0; i < D; i++) x[i] = 0; }
Vec (double ax) { for (int i = 0; i < D; i++) x[i] = ax; } Vec (T ax) { for (int i = 0; i < D; i++) x[i] = ax; }
Vec (double ax, double ay) Vec (T ax, T ay)
{ {
// static_assert(D==2, "Vec<D> constructor with 2 args called"); // static_assert(D==2, "Vec<D> constructor with 2 args called");
x[0] = ax; x[1] = ay; x[0] = ax; x[1] = ay;
} }
Vec (double ax, double ay, double az) Vec (T ax, T ay, T az)
{ {
// static_assert(D==3, "Vec<D> constructor with 3 args called"); // static_assert(D==3, "Vec<D> constructor with 3 args called");
x[0] = ax; x[1] = ay; x[2] = az; x[0] = ax; x[1] = ay; x[2] = az;
} }
Vec (double ax, double ay, double az, double au) Vec (T ax, T ay, T az, T au)
{ x[0] = ax; x[1] = ay; x[2] = az; x[3] = au; } { x[0] = ax; x[1] = ay; x[2] = az; x[3] = au; }
Vec (const Vec<D> & p2) Vec (const Vec<D> & p2)
@ -104,28 +104,28 @@ namespace netgen
return *this; return *this;
} }
Vec & operator= (double s) Vec & operator= (T s)
{ {
for (int i = 0; i < D; i++) x[i] = s; for (int i = 0; i < D; i++) x[i] = s;
return *this; return *this;
} }
double & operator() (int i) { return x[i]; } T & operator() (int i) { return x[i]; }
const double & operator() (int i) const { return x[i]; } const T & operator() (int i) const { return x[i]; }
operator const double* () const { return x; } operator const T* () const { return x; }
double Length () const T Length () const
{ {
double l = 0; T l = 0;
for (int i = 0; i < D; i++) for (int i = 0; i < D; i++)
l += x[i] * x[i]; l += x[i] * x[i];
return sqrt (l); return sqrt (l);
} }
double Length2 () const T Length2 () const
{ {
double l = 0; T l = 0;
for (int i = 0; i < D; i++) for (int i = 0; i < D; i++)
l += x[i] * x[i]; l += x[i] * x[i];
return l; return l;
@ -133,7 +133,7 @@ namespace netgen
const Vec<D> & Normalize () const Vec<D> & Normalize ()
{ {
double l = Length(); T l = Length();
if (l != 0) if (l != 0)
for (int i = 0; i < D; i++) for (int i = 0; i < D; i++)
x[i] /= l; x[i] /= l;
@ -147,19 +147,19 @@ namespace netgen
template <int H, int W=H> template <int H, int W=H, typename T = double>
class Mat class Mat
{ {
protected: protected:
double x[H*W]; T x[H*W];
public: public:
Mat () { ; } Mat () { ; }
Mat (const Mat & b) Mat (const Mat & b)
{ for (int i = 0; i < H*W; i++) x[i] = b.x[i]; } { for (int i = 0; i < H*W; i++) x[i] = b.x[i]; }
Mat & operator= (double s) Mat & operator= (T s)
{ {
for (int i = 0; i < H*W; i++) x[i] = s; for (int i = 0; i < H*W; i++) x[i] = s;
return *this; return *this;
@ -171,30 +171,30 @@ namespace netgen
return *this; return *this;
} }
double & operator() (int i, int j) { return x[i*W+j]; } T & operator() (int i, int j) { return x[i*W+j]; }
const double & operator() (int i, int j) const { return x[i*W+j]; } const T & operator() (int i, int j) const { return x[i*W+j]; }
double & operator() (int i) { return x[i]; } T & operator() (int i) { return x[i]; }
const double & operator() (int i) const { return x[i]; } const T & operator() (int i) const { return x[i]; }
Vec<H> Col (int i) const Vec<H,T> Col (int i) const
{ {
Vec<H> hv; Vec<H,T> hv;
for (int j = 0; j < H; j++) for (int j = 0; j < H; j++)
hv(j) = x[j*W+i]; hv(j) = x[j*W+i];
return hv; return hv;
} }
Vec<W> Row (int i) const Vec<W,T> Row (int i) const
{ {
Vec<W> hv; Vec<W,T> hv;
for (int j = 0; j < W; j++) for (int j = 0; j < W; j++)
hv(j) = x[i*W+j]; hv(j) = x[i*W+j];
return hv; return hv;
} }
void Solve (const Vec<H> & rhs, Vec<W> & sol) const void Solve (const Vec<H,T> & rhs, Vec<W,T> & sol) const
{ {
Mat<W,H> inv; Mat<W,H,T> inv;
CalcInverse (*this, inv); CalcInverse (*this, inv);
sol = inv * rhs; sol = inv * rhs;
} }

View File

@ -306,14 +306,14 @@ namespace netgen
return m; return m;
} }
template <typename T>
inline Mat<3,3> operator* (const Mat<3,3> & a, const Mat<3,3> & b) inline Mat<3,3,T> operator* (const Mat<3,3,T> & a, const Mat<3,3,T> & b)
{ {
Mat<3,3> m; Mat<3,3,T> m;
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
{ {
double sum = 0; T sum = T(0);
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
sum += a(i,k) * b(k, j); sum += a(i,k) * b(k, j);
m(i,j) = sum; m(i,j) = sum;

View File

@ -645,6 +645,13 @@ namespace netgen
__m256d * x, size_t sx, __m256d * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const __m256d * dxdxi, size_t sdxdxi) const
{ {
mesh->GetCurvedElements().CalcMultiPointElementTransformation
(elnr, npts,
reinterpret_cast<const SIMD<double>*> (xi), sxi,
reinterpret_cast<SIMD<double>*> (x), sx,
reinterpret_cast<SIMD<double>*> (dxdxi), sdxdxi);
/*
for (int i = 0; i < npts; i++) for (int i = 0; i < npts; i++)
{ {
double hxi[4][3]; double hxi[4][3];
@ -665,6 +672,7 @@ namespace netgen
x += sx; x += sx;
dxdxi += sdxdxi; dxdxi += sdxdxi;
} }
*/
} }
template<> DLL_HEADER void Ngx_Mesh :: template<> DLL_HEADER void Ngx_Mesh ::

View File

@ -152,9 +152,120 @@ extern ostream & operator<< (ostream & ost, const DenseMatrix & m);
template <int WIDTH> template <int WIDTH, typename T = double>
class MatrixFixWidth class MatrixFixWidth
{ {
protected:
int height;
T * __restrict data;
bool ownmem;
public:
///
MatrixFixWidth ()
{ height = 0; data = 0; ownmem = false; }
///
MatrixFixWidth (int h)
{ height = h; data = new T[WIDTH*height]; ownmem = true; }
///
MatrixFixWidth (int h, T * adata)
{ height = h; data = adata; ownmem = false; }
///
MatrixFixWidth (const MatrixFixWidth & m2)
: height(m2.height), data(m2.data), ownmem(false)
{ ; }
///
~MatrixFixWidth ()
{ if (ownmem) delete [] data; }
void SetSize (int h)
{
if (h != height)
{
if (ownmem) delete data;
height = h;
data = new T[WIDTH*height];
ownmem = true;
}
}
///
int Height() const { return height; }
///
int Width() const { return WIDTH; }
MatrixFixWidth & operator= (const MatrixFixWidth & m2)
{
for (int i = 0; i < height*WIDTH; i++)
data[i] = m2.data[i];
}
///
MatrixFixWidth & operator= (T v)
{
for (int i = 0; i < height*WIDTH; i++)
data[i] = v;
return *this;
}
/*
///
void Mult (const FlatVector & v, FlatVector & prod) const
{
T sum;
const T * mp, * sp;
T * dp;
mp = data;
dp = &prod[0];
for (int i = 0; i < height; i++)
{
sum = 0;
sp = &v[0];
for (int j = 0; j < WIDTH; j++)
{
sum += *mp * *sp;
mp++;
sp++;
}
*dp = sum;
dp++;
}
}
*/
T & operator() (int i, int j)
{ return data[i*WIDTH+j]; }
const T & operator() (int i, int j) const
{ return data[i*WIDTH+j]; }
MatrixFixWidth & operator*= (T v)
{
if (data)
for (int i = 0; i < height*WIDTH; i++)
data[i] *= v;
return *this;
}
const T & Get(int i, int j) const { return data[(i-1)*WIDTH+j-1]; }
///
const T & Get(int i) const { return data[i-1]; }
///
void Set(int i, int j, T v) { data[(i-1)*WIDTH+j-1] = v; }
///
T & Elem(int i, int j) { return data[(i-1)*WIDTH+j-1]; }
///
const T & ConstElem(int i, int j) const { return data[(i-1)*WIDTH+j-1]; }
};
template <int WIDTH>
class MatrixFixWidth<WIDTH,double>
{
protected: protected:
int height; int height;
double * data; double * data;
@ -261,6 +372,9 @@ public:
}; };
template <int WIDTH> template <int WIDTH>
extern ostream & operator<< (ostream & ost, const MatrixFixWidth<WIDTH> & m) extern ostream & operator<< (ostream & ost, const MatrixFixWidth<WIDTH> & m)
{ {

View File

@ -79,6 +79,47 @@ public:
}; };
template <typename T>
class TFlatVector
{
protected:
int s;
T * data;
public:
TFlatVector () { ; }
TFlatVector (int as, T * adata)
{ s = as; data = adata; }
int Size () const
{ return s; }
TFlatVector & operator= (const TFlatVector & v)
{ memcpy (data, v.data, s*sizeof(T)); return *this; }
TFlatVector & operator= (T scal)
{
for (int i = 0; i < s; i++) data[i] = scal;
return *this;
}
T & operator[] (int i) { return data[i]; }
const T & operator[] (int i) const { return data[i]; }
T & operator() (int i) { return data[i]; }
const T & operator() (int i) const { return data[i]; }
// double & Elem (int i) { return data[i-1]; }
// const double & Get (int i) const { return data[i-1]; }
// void Set (int i, double val) { data[i-1] = val; }
TFlatVector & operator*= (T scal)
{
for (int i = 0; i < s; i++) data[i] *= scal;
return *this;
}
};
class Vector : public FlatVector class Vector : public FlatVector
{ {

View File

@ -48,9 +48,10 @@ namespace netgen
// compute edge bubbles up to order n, x \in (-1, 1) // compute edge bubbles up to order n, x \in (-1, 1)
static void CalcEdgeShape (int n, double x, double * shape) template <typename T>
static void CalcEdgeShape (int n, T x, T * shape)
{ {
double p1 = x, p2 = -1, p3 = 0; T p1 = x, p2 = -1, p3 = 0;
for (int j=2; j<=n; j++) for (int j=2; j<=n; j++)
{ {
p3=p2; p2=p1; p3=p2; p2=p1;
@ -76,10 +77,11 @@ namespace netgen
} }
} }
static void CalcEdgeShapeDx (int n, double x, double * shape, double * dshape) template <typename T>
static void CalcEdgeShapeDx (int n, T x, T * shape, T * dshape)
{ {
double p1 = x, p2 = -1, p3 = 0; T p1 = x, p2 = -1, p3 = 0;
double p1dx = 1, p2dx = 0, p3dx = 0; T p1dx = 1, p2dx = 0, p3dx = 0;
for (int j=2; j<=n; j++) for (int j=2; j<=n; j++)
{ {
@ -95,7 +97,8 @@ namespace netgen
} }
// compute L_i(x/t) * t^i // compute L_i(x/t) * t^i
static void CalcScaledEdgeShape (int n, double x, double t, double * shape) template <typename T>
static void CalcScaledEdgeShape (int n, T x, T t, T * shape)
{ {
static bool init = false; static bool init = false;
static double coefs[100][2]; static double coefs[100][2];
@ -109,8 +112,8 @@ namespace netgen
init = true; init = true;
} }
double p1 = x, p2 = -1, p3 = 0; T p1 = x, p2 = -1, p3 = 0;
double tt = t*t; T tt = t*t;
for (int j=0; j<=n-2; j++) for (int j=0; j<=n-2; j++)
{ {
p3=p2; p2=p1; p3=p2; p2=p1;
@ -120,13 +123,13 @@ namespace netgen
} }
} }
template <int DIST> template <int DIST, typename T>
static void CalcScaledEdgeShapeDxDt (int n, double x, double t, double * dshape) static void CalcScaledEdgeShapeDxDt (int n, T x, T t, T * dshape)
{ {
double p1 = x, p2 = -1, p3 = 0; T p1 = x, p2 = -1, p3 = 0;
double p1dx = 1, p1dt = 0; T p1dx = 1, p1dt = 0;
double p2dx = 0, p2dt = 0; T p2dx = 0, p2dt = 0;
double p3dx = 0, p3dt = 0; T p3dx = 0, p3dt = 0;
for (int j=0; j<=n-2; j++) for (int j=0; j<=n-2; j++)
{ {
@ -225,7 +228,7 @@ namespace netgen
template <class S, class T> template <class S, class T>
void Evaluate (int n, S x, T * values) void Evaluate (int n, S x, T * values)
{ {
S p1 = 1.0, p2 = 0.0, p3; S p1(1.0), p2(0.0), p3;
if (n >= 0) if (n >= 0)
p2 = values[0] = 1.0; p2 = values[0] = 1.0;
@ -243,7 +246,7 @@ namespace netgen
template <class S, class T> template <class S, class T>
void EvaluateScaled (int n, S x, S y, T * values) void EvaluateScaled (int n, S x, S y, T * values)
{ {
S p1 = 1.0, p2 = 0.0, p3; S p1(1.0), p2(0.0), p3;
if (n >= 0) if (n >= 0)
p2 = values[0] = 1.0; p2 = values[0] = 1.0;
@ -314,7 +317,7 @@ namespace netgen
if (n >= 0) values[0] = 1.0; if (n >= 0) values[0] = 1.0;
*/ */
S p1 = 1.0, p2 = 0.0, p3; S p1(1.0), p2(0.0), p3;
if (n >= 0) if (n >= 0)
p2 = values[0] = 1.0; p2 = values[0] = 1.0;
@ -378,14 +381,14 @@ namespace netgen
shape[ii++] = hx[ix]*hy[iy+50*ix]; shape[ii++] = hx[ix]*hy[iy+50*ix];
} }
template <typename T>
static void CalcTrigShapeDxDy (int n, double x, double y, double * dshape) static void CalcTrigShapeDxDy (int n, T x, T y, T * dshape)
{ {
if (n < 3) return; if (n < 3) return;
AutoDiff<2> adx(x, 0); AutoDiff<2,T> adx(x, 0);
AutoDiff<2> ady(y, 1); AutoDiff<2,T> ady(y, 1);
AutoDiff<2> res[2000]; AutoDiff<2,T> res[2000];
CalcTrigShape (n, adx, ady, &res[0]); CalcTrigShape (n, adx, ady, &res[0]);
int ndof = (n-1)*(n-2)/2; int ndof = (n-1)*(n-2)/2;
for (int i = 0; i < ndof; i++) for (int i = 0; i < ndof; i++)
@ -421,13 +424,14 @@ namespace netgen
// compute face bubbles up to order n, 0 < y, y-x < 1, x+y < 1 // compute face bubbles up to order n, 0 < y, y-x < 1, x+y < 1
static void CalcScaledTrigShapeDxDyDt (int n, double x, double y, double t, double * dshape) template <typename T>
static void CalcScaledTrigShapeDxDyDt (int n, T x, T y, T t, T * dshape)
{ {
if (n < 3) return; if (n < 3) return;
AutoDiff<3> adx(x, 0); AutoDiff<3,T> adx(x, 0);
AutoDiff<3> ady(y, 1); AutoDiff<3,T> ady(y, 1);
AutoDiff<3> adt(t, 2); AutoDiff<3,T> adt(t, 2);
AutoDiff<3> res[2000]; AutoDiff<3,T> res[2000];
CalcScaledTrigShape (n, adx, ady, adt, &res[0]); CalcScaledTrigShape (n, adx, ady, adt, &res[0]);
int ndof = (n-1)*(n-2)/2; int ndof = (n-1)*(n-2)/2;
for (int i = 0; i < ndof; i++) for (int i = 0; i < ndof; i++)
@ -1390,7 +1394,6 @@ namespace netgen
void CurvedElements :: void CurvedElements ::
CalcElementShapes (SegmentInfo & info, double xi, Vector & shapes) const CalcElementShapes (SegmentInfo & info, double xi, Vector & shapes) const
{ {
@ -2281,9 +2284,6 @@ namespace netgen
} }
Vector shapes;
MatrixFixWidth<3> dshapes;
const Element & el = mesh[elnr]; const Element & el = mesh[elnr];
ELEMENT_TYPE type = el.GetType(); ELEMENT_TYPE type = el.GetType();
@ -2315,6 +2315,11 @@ namespace netgen
} }
} }
ArrayMem<double,100> mem(info.ndof);
TFlatVector<double> shapes(info.ndof, &mem[0]);
ArrayMem<double,100> dshapes_mem(info.ndof*3);
MatrixFixWidth<3> dshapes(info.ndof, &dshapes_mem[0]);
CalcElementShapes (info, xi, shapes); CalcElementShapes (info, xi, shapes);
Vec<3> * coefs = (info.ndof <= 10) ? Vec<3> * coefs = (info.ndof <= 10) ?
@ -2359,16 +2364,16 @@ namespace netgen
template <typename T>
void CurvedElements :: CalcElementShapes (ElementInfo & info, const Point<3> & xi, Vector & shapes) const void CurvedElements :: CalcElementShapes (ElementInfo & info, Point<3,T> xi, TFlatVector<T> shapes) const
{ {
const Element & el = mesh[info.elnr]; const Element & el = mesh[info.elnr];
if (rational && info.order >= 2) if (rational && info.order >= 2)
{ {
shapes.SetSize(10); // shapes.SetSize(10);
double w = 1; T w = 1;
double lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; T lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) };
for (int j = 0; j < 4; j++) for (int j = 0; j < 4; j++)
shapes(j) = lami[j] * lami[j]; shapes(j) = lami[j] * lami[j];
@ -2384,7 +2389,7 @@ namespace netgen
return; return;
} }
shapes.SetSize(info.ndof); // shapes.SetSize(info.ndof);
switch (el.GetType()) switch (el.GetType())
{ {
@ -2434,10 +2439,10 @@ namespace netgen
case TET10: case TET10:
{ {
double x = xi(0); T x = xi(0);
double y = xi(1); T y = xi(1);
double z = xi(2); T z = xi(2);
double lam4 = 1 - x - y - z; T lam4 = 1 - x - y - z;
/* /*
shapes(0) = xi(0); shapes(0) = xi(0);
shapes(1) = xi(1); shapes(1) = xi(1);
@ -2462,8 +2467,8 @@ namespace netgen
case PRISM: case PRISM:
{ {
double lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) }; T lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) };
double lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) }; T lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) };
for (int i = 0; i < 6; i++) for (int i = 0; i < 6; i++)
shapes(i) = lami[i] * lamiz[i]; shapes(i) = lami[i] * lamiz[i];
for (int i = 6; i < info.ndof; i++) for (int i = 6; i < info.ndof; i++)
@ -2483,7 +2488,7 @@ namespace netgen
if (el[vi1] > el[vi2]) swap (vi1, vi2); if (el[vi1] > el[vi2]) swap (vi1, vi2);
CalcScaledEdgeShape (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &shapes(ii)); CalcScaledEdgeShape (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &shapes(ii));
double facz = (i < 3) ? (1-xi(2)) : xi(2); T facz = (i < 3) ? (1-xi(2)) : xi(2);
for (int j = 0; j < eorder-1; j++) for (int j = 0; j < eorder-1; j++)
shapes(ii+j) *= facz; shapes(ii+j) *= facz;
@ -2499,9 +2504,9 @@ namespace netgen
int vi1 = edges[i][0]-1, vi2 = edges[i][1]-1; int vi1 = edges[i][0]-1, vi2 = edges[i][1]-1;
if (el[vi1] > el[vi2]) swap (vi1, vi2); if (el[vi1] > el[vi2]) swap (vi1, vi2);
double bubz = lamiz[vi1]*lamiz[vi2]; T bubz = lamiz[vi1]*lamiz[vi2];
double polyz = lamiz[vi1] - lamiz[vi2]; T polyz = lamiz[vi1] - lamiz[vi2];
double bubxy = lami[vi1]; T bubxy = lami[vi1];
for (int j = 0; j < eorder-1; j++) for (int j = 0; j < eorder-1; j++)
{ {
@ -2538,11 +2543,12 @@ namespace netgen
case PYRAMID: case PYRAMID:
{ {
shapes = 0.0; shapes = 0.0;
double x = xi(0); T x = xi(0);
double y = xi(1); T y = xi(1);
double z = xi(2); T z = xi(2);
if (z == 1.) z = 1-1e-10; // if (z == 1.) z = 1-1e-10;
z *= (1-1e-12);
shapes[0] = (1-z-x)*(1-z-y) / (1-z); shapes[0] = (1-z-x)*(1-z-y) / (1-z);
shapes[1] = x*(1-z-y) / (1-z); shapes[1] = x*(1-z-y) / (1-z);
shapes[2] = x*y / (1-z); shapes[2] = x*y / (1-z);
@ -2551,7 +2557,7 @@ namespace netgen
if (info.order == 1) return; if (info.order == 1) return;
double sigma[4] = T sigma[4] =
{ {
sigma[0] = ( (1-z-x) + (1-z-y) ), sigma[0] = ( (1-z-x) + (1-z-y) ),
sigma[1] = ( x + (1-z-y) ), sigma[1] = ( x + (1-z-y) ),
@ -2570,7 +2576,7 @@ namespace netgen
if (el[vi1] > el[vi2]) swap (vi1, vi2); if (el[vi1] > el[vi2]) swap (vi1, vi2);
CalcScaledEdgeShape (eorder, sigma[vi1]-sigma[vi2], 1-z, &shapes(ii)); CalcScaledEdgeShape (eorder, sigma[vi1]-sigma[vi2], 1-z, &shapes(ii));
double fac = (shapes[vi1]+shapes[vi2]) / (1-z); T fac = (shapes[vi1]+shapes[vi2]) / (1-z);
for (int j = 0; j < eorder-1; j++) for (int j = 0; j < eorder-1; j++)
shapes(ii+j) *= fac; shapes(ii+j) *= fac;
@ -2586,9 +2592,9 @@ namespace netgen
case HEX: case HEX:
{ {
shapes = 0.0; shapes = 0.0;
double x = xi(0); T x = xi(0);
double y = xi(1); T y = xi(1);
double z = xi(2); T z = xi(2);
shapes[0] = (1-x)*(1-y)*(1-z); shapes[0] = (1-x)*(1-y)*(1-z);
shapes[1] = x *(1-y)*(1-z); shapes[1] = x *(1-y)*(1-z);
@ -2601,7 +2607,7 @@ namespace netgen
if (info.order == 1) return; if (info.order == 1) return;
double mu[8] = { T mu[8] = {
(1-x)+(1-y)+(1-z), (1-x)+(1-y)+(1-z),
x +(1-y)+(1-z), x +(1-y)+(1-z),
x + y +(1-z), x + y +(1-z),
@ -2624,7 +2630,7 @@ namespace netgen
if (el[vi1] > el[vi2]) swap (vi1, vi2); if (el[vi1] > el[vi2]) swap (vi1, vi2);
CalcEdgeShape (eorder, mu[vi1]-mu[vi2], &shapes(ii)); CalcEdgeShape (eorder, mu[vi1]-mu[vi2], &shapes(ii));
double lame = shapes(vi1)+shapes(vi2); T lame = shapes(vi1)+shapes(vi2);
for (int j = 0; j < order-1; j++) for (int j = 0; j < order-1; j++)
shapes(ii+j) *= lame; shapes(ii+j) *= lame;
ii += eorder-1; ii += eorder-1;
@ -2642,24 +2648,27 @@ namespace netgen
} }
template <typename T>
void CurvedElements :: void CurvedElements ::
CalcElementDShapes (ElementInfo & info, const Point<3> & xi, MatrixFixWidth<3> & dshapes) const CalcElementDShapes (ElementInfo & info, const Point<3,T> xi, MatrixFixWidth<3,T> dshapes) const
{ {
// static int timer = NgProfiler::CreateTimer ("calcelementdshapes");
const Element & el = mesh[info.elnr]; const Element & el = mesh[info.elnr];
dshapes.SetSize(info.ndof); // dshapes.SetSize(info.ndof);
dshapes = 0.0; if ( (long int)(&dshapes(0,0)) % alignof(T) != 0)
throw NgException ("alignment problem");
if (dshapes.Height() != info.ndof)
throw NgException ("wrong height");
if (rational && info.order >= 2) if (rational && info.order >= 2)
{ {
double w = 1; T w = 1;
double dw[3] = { 0, 0, 0 }; T dw[3] = { 0, 0, 0 };
double lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; T lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) };
double dlami[4][3] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { -1, -1, -1 }}; T dlami[4][3] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { -1, -1, -1 }};
double shapes[10]; T shapes[10];
for (int j = 0; j < 4; j++) for (int j = 0; j < 4; j++)
{ {
@ -2672,7 +2681,7 @@ namespace netgen
const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TET); const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TET);
for (int j = 0; j < 6; j++) for (int j = 0; j < 6; j++)
{ {
double wi = edgeweight[info.edgenrs[j]]; T wi = edgeweight[info.edgenrs[j]];
shapes[j+4] = 2 * wi * lami[edges[j][0]-1] * lami[edges[j][1]-1]; shapes[j+4] = 2 * wi * lami[edges[j][0]-1] * lami[edges[j][1]-1];
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
@ -2696,6 +2705,8 @@ namespace netgen
{ {
case TET: case TET:
{ {
dshapes = T(0.0);
dshapes(0,0) = 1; dshapes(0,0) = 1;
dshapes(1,1) = 1; dshapes(1,1) = 1;
dshapes(2,2) = 1; dshapes(2,2) = 1;
@ -2705,7 +2716,7 @@ namespace netgen
if (info.order == 1) return; if (info.order == 1) return;
double lami[] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; T lami[] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) };
int ii = 4; int ii = 4;
const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TET); const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TET);
for (int i = 0; i < 6; i++) for (int i = 0; i < 6; i++)
@ -2718,7 +2729,7 @@ namespace netgen
CalcScaledEdgeShapeDxDt<3> (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &dshapes(ii,0)); CalcScaledEdgeShapeDxDt<3> (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &dshapes(ii,0));
Mat<2,3> trans; Mat<2,3,T> trans;
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
{ {
trans(0,j) = dshapes(vi1,j)-dshapes(vi2,j); trans(0,j) = dshapes(vi1,j)-dshapes(vi2,j);
@ -2727,8 +2738,8 @@ namespace netgen
for (int j = 0; j < order-1; j++) for (int j = 0; j < order-1; j++)
{ {
double ddx = dshapes(ii+j,0); T ddx = dshapes(ii+j,0);
double ddt = dshapes(ii+j,1); T ddt = dshapes(ii+j,1);
dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0);
dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1);
dshapes(ii+j,2) = ddx * trans(0,2) + ddt * trans(1,2); dshapes(ii+j,2) = ddx * trans(0,2) + ddt * trans(1,2);
@ -2754,7 +2765,7 @@ namespace netgen
lami[fnums[2]], lami[fnums[0]]+lami[fnums[1]]+lami[fnums[2]], lami[fnums[2]], lami[fnums[0]]+lami[fnums[1]]+lami[fnums[2]],
&dshapes(ii,0)); &dshapes(ii,0));
Mat<3,3> trans; Mat<3,3,T> trans;
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
{ {
trans(0,j) = dshapes(fnums[1],j)-dshapes(fnums[0],j); trans(0,j) = dshapes(fnums[1],j)-dshapes(fnums[0],j);
@ -2765,9 +2776,9 @@ namespace netgen
int nfd = (forder-1)*(forder-2)/2; int nfd = (forder-1)*(forder-2)/2;
for (int j = 0; j < nfd; j++) for (int j = 0; j < nfd; j++)
{ {
double ddx = dshapes(ii+j,0); T ddx = dshapes(ii+j,0);
double ddy = dshapes(ii+j,1); T ddy = dshapes(ii+j,1);
double ddt = dshapes(ii+j,2); T ddt = dshapes(ii+j,2);
dshapes(ii+j,0) = ddx * trans(0,0) + ddy * trans(1,0) + ddt * trans(2,0); dshapes(ii+j,0) = ddx * trans(0,0) + ddy * trans(1,0) + ddt * trans(2,0);
dshapes(ii+j,1) = ddx * trans(0,1) + ddy * trans(1,1) + ddt * trans(2,1); dshapes(ii+j,1) = ddx * trans(0,1) + ddy * trans(1,1) + ddt * trans(2,1);
dshapes(ii+j,2) = ddx * trans(0,2) + ddy * trans(1,2) + ddt * trans(2,2); dshapes(ii+j,2) = ddx * trans(0,2) + ddy * trans(1,2) + ddt * trans(2,2);
@ -2784,7 +2795,7 @@ namespace netgen
{ {
if (dshapes.Height() == 4) if (dshapes.Height() == 4)
{ {
dshapes = 0.0; dshapes = T(0.0);
dshapes(0,0) = 1; dshapes(0,0) = 1;
dshapes(1,1) = 1; dshapes(1,1) = 1;
@ -2795,11 +2806,11 @@ namespace netgen
} }
else else
{ {
AutoDiff<3> x(xi(0), 0); AutoDiff<3,T> x(xi(0), 0);
AutoDiff<3> y(xi(1), 1); AutoDiff<3,T> y(xi(1), 1);
AutoDiff<3> z(xi(2), 2); AutoDiff<3,T> z(xi(2), 2);
AutoDiff<3> lam4 = 1-x-y-z; AutoDiff<3,T> lam4 = 1-x-y-z;
AutoDiff<3> shapes[10]; AutoDiff<3,T> shapes[10];
shapes[0] = 2 * x * x - x; shapes[0] = 2 * x * x - x;
shapes[1] = 2 * y * y - y; shapes[1] = 2 * y * y - y;
@ -2829,10 +2840,10 @@ namespace netgen
case PRISM: case PRISM:
{ {
double lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) }; T lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) };
double lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) }; T lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) };
double dlamiz[6] = { -1, -1, -1, 1, 1, 1 }; T dlamiz[6] = { -1, -1, -1, 1, 1, 1 };
double dlami[6][2] = T dlami[6][2] =
{ { 1, 0, }, { { 1, 0, },
{ 0, 1, }, { 0, 1, },
{ -1, -1 }, { -1, -1 },
@ -2863,11 +2874,12 @@ namespace netgen
vi1 = vi1 % 3; vi1 = vi1 % 3;
vi2 = vi2 % 3; vi2 = vi2 % 3;
Vector shapei(order+1); ArrayMem<T,20> shapei_mem(order+1);
TFlatVector<T> shapei(order+1, &shapei_mem[0]);
CalcScaledEdgeShapeDxDt<3> (order, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &dshapes(ii,0) ); CalcScaledEdgeShapeDxDt<3> (order, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &dshapes(ii,0) );
CalcScaledEdgeShape(order, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &shapei(0) ); CalcScaledEdgeShape(order, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &shapei(0) );
Mat<2,2> trans; Mat<2,2,T> trans;
for (int j = 0; j < 2; j++) for (int j = 0; j < 2; j++)
{ {
trans(0,j) = dlami[vi1][j]-dlami[vi2][j]; trans(0,j) = dlami[vi1][j]-dlami[vi2][j];
@ -2876,16 +2888,16 @@ namespace netgen
for (int j = 0; j < order-1; j++) for (int j = 0; j < order-1; j++)
{ {
double ddx = dshapes(ii+j,0); T ddx = dshapes(ii+j,0);
double ddt = dshapes(ii+j,1); T ddt = dshapes(ii+j,1);
dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0);
dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1);
} }
double facz = (i < 3) ? (1-xi(2)) : xi(2); T facz = (i < 3) ? (1-xi(2)) : xi(2);
double dfacz = (i < 3) ? (-1) : 1; T dfacz = (i < 3) ? (-1) : 1;
for (int j = 0; j < order-1; j++) for (int j = 0; j < order-1; j++)
{ {
dshapes(ii+j,0) *= facz; dshapes(ii+j,0) *= facz;
@ -2905,13 +2917,13 @@ namespace netgen
int vi1 = (edges[i][0]-1), vi2 = (edges[i][1]-1); int vi1 = (edges[i][0]-1), vi2 = (edges[i][1]-1);
if (el[vi1] > el[vi2]) swap (vi1, vi2); if (el[vi1] > el[vi2]) swap (vi1, vi2);
double bubz = lamiz[vi1] * lamiz[vi2]; T bubz = lamiz[vi1] * lamiz[vi2];
double dbubz = dlamiz[vi1]*lamiz[vi2] + lamiz[vi1]*dlamiz[vi2]; T dbubz = dlamiz[vi1]*lamiz[vi2] + lamiz[vi1]*dlamiz[vi2];
double polyz = lamiz[vi1] - lamiz[vi2]; T polyz = lamiz[vi1] - lamiz[vi2];
double dpolyz = dlamiz[vi1] - dlamiz[vi2]; T dpolyz = dlamiz[vi1] - dlamiz[vi2];
double bubxy = lami[(vi1)%3]; T bubxy = lami[(vi1)%3];
double dbubxydx = dlami[(vi1)%3][0]; T dbubxydx = dlami[(vi1)%3][0];
double dbubxydy = dlami[(vi1)%3][1]; T dbubxydy = dlami[(vi1)%3][1];
for (int j = 0; j < eorder-1; j++) for (int j = 0; j < eorder-1; j++)
{ {
@ -2942,8 +2954,10 @@ namespace netgen
if(el[fav[1]] > el[fav[2]]) swap(fav[1],fav[2]); if(el[fav[1]] > el[fav[2]]) swap(fav[1],fav[2]);
if(el[fav[0]] > el[fav[1]]) swap(fav[0],fav[1]); if(el[fav[0]] > el[fav[1]]) swap(fav[0],fav[1]);
MatrixFixWidth<2> dshapei(ndf); ArrayMem<T,2*100> dshapei_mem(ndf);
Vector shapei(ndf); ArrayMem<T,100> shapei_mem(ndf);
MatrixFixWidth<2,T> dshapei(ndf, &dshapei_mem[0]);
TFlatVector<T> shapei(ndf, &shapei_mem[0]);
CalcTrigShapeDxDy (forder, CalcTrigShapeDxDy (forder,
lami[fav[2]]-lami[fav[1]], lami[fav[0]], lami[fav[2]]-lami[fav[1]], lami[fav[0]],
@ -2951,7 +2965,7 @@ namespace netgen
CalcTrigShape (forder, lami[fav[2]]-lami[fav[1]], lami[fav[0]], CalcTrigShape (forder, lami[fav[2]]-lami[fav[1]], lami[fav[0]],
&shapei(0)); &shapei(0));
Mat<2,2> trans; Mat<2,2,T> trans;
for (int j = 0; j < 2; j++) for (int j = 0; j < 2; j++)
{ {
trans(0,j) = dlami[fav[2]][j]-dlami[fav[1]][j]; trans(0,j) = dlami[fav[2]][j]-dlami[fav[1]][j];
@ -2962,8 +2976,8 @@ namespace netgen
{ {
// double ddx = dshapes(ii+j,0); // double ddx = dshapes(ii+j,0);
// double ddt = dshapes(ii+j,1); // double ddt = dshapes(ii+j,1);
double ddx = dshapei(j,0); T ddx = dshapei(j,0);
double ddt = dshapei(j,1); T ddt = dshapei(j,1);
dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0);
dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1);
} }
@ -2983,14 +2997,15 @@ namespace netgen
case PYRAMID: case PYRAMID:
{ {
dshapes = 0.0; dshapes = T(0.0);
double x = xi(0); T x = xi(0);
double y = xi(1); T y = xi(1);
double z = xi(2); T z = xi(2);
if (z == 1.) z = 1-1e-10; // if (z == 1.) z = 1-1e-10;
double z1 = 1-z; z *= 1-1e-12;
double z2 = z1*z1; T z1 = 1-z;
T z2 = z1*z1;
dshapes(0,0) = -(z1-y)/z1; dshapes(0,0) = -(z1-y)/z1;
dshapes(0,1) = -(z1-x)/z1; dshapes(0,1) = -(z1-x)/z1;
@ -3016,29 +3031,30 @@ namespace netgen
int ii = 5; int ii = 5;
const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (PYRAMID); const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (PYRAMID);
if (z == 1.) z = 1-1e-10; // if (z == 1.) z = 1-1e-10;
double shapes[5]; z *= 1-1e-12;
T shapes[5];
shapes[0] = (1-z-x)*(1-z-y) / (1-z); shapes[0] = (1-z-x)*(1-z-y) / (1-z);
shapes[1] = x*(1-z-y) / (1-z); shapes[1] = x*(1-z-y) / (1-z);
shapes[2] = x*y / (1-z); shapes[2] = x*y / (1-z);
shapes[3] = (1-z-x)*y / (1-z); shapes[3] = (1-z-x)*y / (1-z);
shapes[4] = z; shapes[4] = z;
double sigma[4] = T sigma[4] =
{ {
( (1-z-x) + (1-z-y) ), ( (1-z-x) + (1-z-y) ),
( x + (1-z-y) ), ( x + (1-z-y) ),
( x + y ), ( x + y ),
( (1-z-x) + y ), ( (1-z-x) + y ),
}; };
double dsigma[4][3] = T dsigma[4][3] =
{ {
{ -1, -1, -2 }, { -1, -1, -2 },
{ 1, -1, -1 }, { 1, -1, -1 },
{ 1, 1, 0 }, { 1, 1, 0 },
{ -1, 1, -1 } { -1, 1, -1 }
}; };
double dz[3] = { 0, 0, 1 }; T dz[3] = { 0, 0, 1 };
for (int i = 0; i < 4; i++) // horizontal edges for (int i = 0; i < 4; i++) // horizontal edges
{ {
int eorder = edgeorder[info.edgenrs[i]]; int eorder = edgeorder[info.edgenrs[i]];
@ -3047,11 +3063,12 @@ namespace netgen
int vi1 = (edges[i][0]-1), vi2 = (edges[i][1]-1); int vi1 = (edges[i][0]-1), vi2 = (edges[i][1]-1);
if (el[vi1] > el[vi2]) swap (vi1, vi2); if (el[vi1] > el[vi2]) swap (vi1, vi2);
Vector shapei(eorder+1); ArrayMem<T,20> shapei_mem(eorder+1);
TFlatVector<T> shapei(eorder+1,&shapei_mem[0]);
CalcScaledEdgeShapeDxDt<3> (eorder, sigma[vi1]-sigma[vi2], 1-z, &dshapes(ii,0) ); CalcScaledEdgeShapeDxDt<3> (eorder, sigma[vi1]-sigma[vi2], 1-z, &dshapes(ii,0) );
CalcScaledEdgeShape(eorder, sigma[vi1]-sigma[vi2], 1-z, &shapei(0) ); CalcScaledEdgeShape(eorder, sigma[vi1]-sigma[vi2], 1-z, &shapei(0) );
double fac = (shapes[vi1]+shapes[vi2]) / (1-z); T fac = (shapes[vi1]+shapes[vi2]) / (1-z);
double dfac[3]; T dfac[3];
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
dfac[k] = ( (dshapes(vi1,k)+dshapes(vi2,k)) * (1-z) - dfac[k] = ( (dshapes(vi1,k)+dshapes(vi2,k)) * (1-z) -
(shapes[vi1]+shapes[vi2]) *(-dshapes(4,k)) ) (shapes[vi1]+shapes[vi2]) *(-dshapes(4,k)) )
@ -3059,8 +3076,8 @@ namespace netgen
for (int j = 0; j < eorder-1; j++) for (int j = 0; j < eorder-1; j++)
{ {
double ddx = dshapes(ii+j,0); T ddx = dshapes(ii+j,0);
double ddt = dshapes(ii+j,1); T ddt = dshapes(ii+j,1);
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
dshapes(ii+j,k) = fac * (ddx * (dsigma[vi1][k]-dsigma[vi2][k]) - ddt*dz[k]) dshapes(ii+j,k) = fac * (ddx * (dsigma[vi1][k]-dsigma[vi2][k]) - ddt*dz[k])
+ dfac[k] * shapei(j); + dfac[k] * shapei(j);
@ -3075,11 +3092,10 @@ namespace netgen
case HEX: case HEX:
{ {
dshapes = 0.0; // NgProfiler::StartTimer(timer);
T x = xi(0);
double x = xi(0); T y = xi(1);
double y = xi(1); T z = xi(2);
double z = xi(2);
// shapes[0] = (1-x)*(1-y)*(1-z); // shapes[0] = (1-x)*(1-y)*(1-z);
dshapes(0,0) = - (1-y)*(1-z); dshapes(0,0) = - (1-y)*(1-z);
@ -3121,10 +3137,11 @@ namespace netgen
dshapes(7,1) = (1-x) * z; dshapes(7,1) = (1-x) * z;
dshapes(7,2) = (1-x) * y; dshapes(7,2) = (1-x) * y;
// NgProfiler::StopTimer(timer);
if (info.order == 1) return; if (info.order == 1) return;
double shapes[8] = { T shapes[8] = {
(1-x)*(1-y)*(1-z), (1-x)*(1-y)*(1-z),
x *(1-y)*(1-z), x *(1-y)*(1-z),
x * y *(1-z), x * y *(1-z),
@ -3135,7 +3152,7 @@ namespace netgen
(1-x)* y *(z), (1-x)* y *(z),
}; };
double mu[8] = { T mu[8] = {
(1-x)+(1-y)+(1-z), (1-x)+(1-y)+(1-z),
x +(1-y)+(1-z), x +(1-y)+(1-z),
x + y +(1-z), x + y +(1-z),
@ -3146,7 +3163,7 @@ namespace netgen
(1-x)+ y +(z) (1-x)+ y +(z)
}; };
double dmu[8][3] = { T dmu[8][3] = {
{ -1, -1, -1 }, { -1, -1, -1 },
{ 1, -1, -1 }, { 1, -1, -1 },
{ 1, 1, -1 }, { 1, 1, -1 },
@ -3157,7 +3174,7 @@ namespace netgen
{ -1, 1, 1 } { -1, 1, 1 }
}; };
ArrayMem<double, 20> hshapes(order+1), hdshapes(order+1); ArrayMem<T, 20> hshapes(order+1), hdshapes(order+1);
int ii = 8; int ii = 8;
const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (HEX); const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (HEX);
@ -3172,8 +3189,8 @@ namespace netgen
CalcEdgeShapeDx (eorder, mu[vi1]-mu[vi2], &hshapes[0], &hdshapes[0]); CalcEdgeShapeDx (eorder, mu[vi1]-mu[vi2], &hshapes[0], &hdshapes[0]);
double lame = shapes[vi1]+shapes[vi2]; T lame = shapes[vi1]+shapes[vi2];
double dlame[3] = { T dlame[3] = {
dshapes(vi1, 0) + dshapes(vi2, 0), dshapes(vi1, 0) + dshapes(vi2, 0),
dshapes(vi1, 1) + dshapes(vi2, 1), dshapes(vi1, 1) + dshapes(vi2, 1),
dshapes(vi1, 2) + dshapes(vi2, 2) dshapes(vi1, 2) + dshapes(vi2, 2)
@ -3779,36 +3796,43 @@ namespace netgen
template <typename T>
void CurvedElements :: void CurvedElements ::
CalcMultiPointElementTransformation (ElementIndex elnr, int n, CalcMultiPointElementTransformation (ElementIndex elnr, int n,
const double * xi, size_t sxi, const T * xi, size_t sxi,
double * x, size_t sx, T * x, size_t sx,
double * dxdxi, size_t sdxdxi) T * dxdxi, size_t sdxdxi)
{ {
// static int timer = NgProfiler::CreateTimer ("calcmultipointtrafo, calcshape"); static int timer = NgProfiler::CreateTimer ("calcmultipointelementtrafo");
static int timer1 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 1");
static int timer2 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 2");
static int timer3 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 3");
static int timer4 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 4");
static int timer5 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 5");
// NgProfiler::StartTimer (timer);
// NgProfiler::StartTimer (timer1);
if (mesh.coarsemesh) if (mesh.coarsemesh)
{ {
const HPRefElement & hpref_el = const HPRefElement & hpref_el =
(*mesh.hpelements) [mesh[elnr].hp_elnr]; (*mesh.hpelements) [mesh[elnr].hp_elnr];
// xi umrechnen // xi umrechnen
double lami[8]; T lami[8];
FlatVector vlami(8, lami); TFlatVector<T> vlami(8, &lami[0]);
ArrayMem<double, 100> coarse_xi (3*n); ArrayMem<T, 100> coarse_xi (3*n);
for (int pi = 0; pi < n; pi++) for (int pi = 0; pi < n; pi++)
{ {
vlami = 0; vlami = 0;
Point<3> pxi; Point<3,T> pxi;
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
pxi(j) = xi[pi*sxi+j]; pxi(j) = xi[pi*sxi+j];
mesh[elnr].GetShapeNew (pxi, vlami); mesh[elnr].GetShapeNew (pxi, vlami);
Point<3> cxi(0,0,0); Point<3,T> cxi(0,0,0);
for (int i = 0; i < hpref_el.np; i++) for (int i = 0; i < hpref_el.np; i++)
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
cxi(j) += hpref_el.param[i][j] * lami[i]; cxi(j) += hpref_el.param[i][j] * lami[i];
@ -3823,15 +3847,15 @@ namespace netgen
x, sx, x, sx,
dxdxi, sdxdxi); dxdxi, sdxdxi);
Mat<3,3> trans, dxdxic; Mat<3,3,T> trans, dxdxic;
if (dxdxi) if (dxdxi)
{ {
MatrixFixWidth<3> dlami(8); MatrixFixWidth<3,T> dlami(8);
dlami = 0; dlami = T(0);
for (int pi = 0; pi < n; pi++) for (int pi = 0; pi < n; pi++)
{ {
Point<3> pxi; Point<3,T> pxi;
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
pxi(j) = xi[pi*sxi+j]; pxi(j) = xi[pi*sxi+j];
@ -3843,7 +3867,7 @@ namespace netgen
for (int i = 0; i < hpref_el.np; i++) for (int i = 0; i < hpref_el.np; i++)
trans(l,k) += hpref_el.param[i][l] * dlami(i, k); trans(l,k) += hpref_el.param[i][l] * dlami(i, k);
Mat<3> mat_dxdxic, mat_dxdxi; Mat<3,3,T> mat_dxdxic, mat_dxdxi;
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
mat_dxdxic(j,k) = dxdxi[pi*sdxdxi+3*j+k]; mat_dxdxic(j,k) = dxdxi[pi*sdxdxi+3*j+k];
@ -3863,10 +3887,8 @@ namespace netgen
// NgProfiler::StopTimer (timer1);
// NgProfiler::StartTimer (timer2);
MatrixFixWidth<3> dshapes;
const Element & el = mesh[elnr]; const Element & el = mesh[elnr];
@ -3896,29 +3918,43 @@ namespace netgen
// info.ndof += facecoeffsindex[info.facenr+1] - facecoeffsindex[info.facenr]; // info.ndof += facecoeffsindex[info.facenr+1] - facecoeffsindex[info.facenr];
} }
// NgProfiler::StopTimer (timer2);
// NgProfiler::StartTimer (timer3);
ArrayMem<Vec<3>,100> coefs(info.ndof); ArrayMem<Vec<3>,100> coefs(info.ndof);
ArrayMem<double,100> shapes_mem(info.ndof); ArrayMem<T,2000> shapes_mem(info.ndof);
Vector shapes(info.ndof, &shapes_mem[0]);
TFlatVector<T> shapes(info.ndof, &shapes_mem[0]);
ArrayMem<T,6000> dshapes_mem(3*info.ndof);
MatrixFixWidth<3,T> dshapes(info.ndof, &dshapes_mem[0]);
// NgProfiler::StopTimer (timer3);
// NgProfiler::StartTimer (timer4);
GetCoefficients (info, &coefs[0]); GetCoefficients (info, &coefs[0]);
if (x) if (x)
{ {
for (int j = 0; j < n; j++) for (int j = 0; j < n; j++)
{ {
Point<3> xij, xj; Point<3,T> xij, xj;
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
xij(k) = xi[j*sxi+k]; xij(k) = xi[j*sxi+k];
CalcElementShapes (info, xij, shapes); CalcElementShapes (info, xij, shapes);
xj = 0; xj = T(0.0);
for (int i = 0; i < coefs.Size(); i++) for (int i = 0; i < coefs.Size(); i++)
xj += shapes(i) * coefs[i]; for (int k = 0; k < 3; k++)
xj(k) += shapes(i) * coefs[i](k);
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
x[j*sx+k] = xj(k); x[j*sx+k] = xj(k);
} }
} }
// NgProfiler::StopTimer (timer4);
// NgProfiler::StartTimer (timer5);
if (dxdxi) if (dxdxi)
{ {
if (info.order == 1 && type == TET) if (info.order == 1 && type == TET)
@ -3926,13 +3962,13 @@ namespace netgen
if (n > 0) if (n > 0)
{ {
Point<3> xij; Point<3,T> xij;
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
xij(k) = xi[k]; xij(k) = xi[k];
CalcElementDShapes (info, xij, dshapes); CalcElementDShapes (info, xij, dshapes);
Mat<3> dxdxij; Mat<3,3,T> dxdxij;
dxdxij = 0.0; dxdxij = 0.0;
for (int i = 0; i < coefs.Size(); i++) for (int i = 0; i < coefs.Size(); i++)
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
@ -3950,36 +3986,94 @@ namespace netgen
{ {
for (int ip = 0; ip < n; ip++) for (int ip = 0; ip < n; ip++)
{ {
Point<3> xij; Point<3,T> xij;
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
xij(k) = xi[ip*sxi+k]; xij(k) = xi[ip*sxi+k];
// just for testing ...
dshapes = T(0.0);
const Element & el = mesh[info.elnr];
if (el.GetType() != HEX)
throw NgException ("not a hex");
CalcElementDShapes (info, xij, dshapes); CalcElementDShapes (info, xij, dshapes);
Mat<3> dxdxij; /*
Mat<3,3,T> dxdxij;
dxdxij = 0.0; dxdxij = 0.0;
for (int i = 0; i < coefs.Size(); i++) for (int i = 0; i < coefs.Size(); i++)
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
dxdxij(j,k) += dshapes(i,k) * coefs[i](j); dxdxij(j,k) += dshapes(i,k) * coefs[i](j);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
for (int k = 0; k < 3; k++) for (int k = 0; k < 3; k++)
dxdxi[ip*sdxdxi+3*j+k] = dxdxij(j,k); dxdxi[ip*sdxdxi+3*j+k] = dxdxij(j,k);
*/
T dxdxi00 = T(0.0);
T dxdxi01 = T(0.0);
T dxdxi02 = T(0.0);
T dxdxi10 = T(0.0);
T dxdxi11 = T(0.0);
T dxdxi12 = T(0.0);
T dxdxi20 = T(0.0);
T dxdxi21 = T(0.0);
T dxdxi22 = T(0.0);
for (int i = 0; i < coefs.Size(); i++)
{
T ds0 = dshapes(i,0);
T ds1 = dshapes(i,1);
T ds2 = dshapes(i,2);
T cf0 = coefs[i](0);
T cf1 = coefs[i](1);
T cf2 = coefs[i](2);
dxdxi00 += ds0*cf0;
dxdxi01 += ds1*cf0;
dxdxi02 += ds2*cf0;
dxdxi10 += ds0*cf1;
dxdxi11 += ds1*cf1;
dxdxi12 += ds2*cf1;
dxdxi20 += ds0*cf2;
dxdxi21 += ds1*cf2;
dxdxi22 += ds2*cf2;
}
dxdxi[ip*sdxdxi+3*0+0] = dxdxi00;
dxdxi[ip*sdxdxi+3*0+1] = dxdxi01;
dxdxi[ip*sdxdxi+3*0+2] = dxdxi02;
dxdxi[ip*sdxdxi+3*1+0] = dxdxi10;
dxdxi[ip*sdxdxi+3*1+1] = dxdxi11;
dxdxi[ip*sdxdxi+3*1+2] = dxdxi12;
dxdxi[ip*sdxdxi+3*2+0] = dxdxi20;
dxdxi[ip*sdxdxi+3*2+1] = dxdxi21;
dxdxi[ip*sdxdxi+3*2+2] = dxdxi22;
} }
} }
} }
// NgProfiler::StopTimer (timer5);
// NgProfiler::StopTimer (timer);
} }
template
void CurvedElements ::
CalcMultiPointElementTransformation
(ElementIndex elnr, int n,
const double * xi, size_t sxi,
double * x, size_t sx,
double * dxdxi, size_t sdxdxi);
template
void CurvedElements ::
CalcMultiPointElementTransformation
(ElementIndex elnr, int n,
const SIMD<double> * xi, size_t sxi,
SIMD<double> * x, size_t sx,
SIMD<double> * dxdxi, size_t sdxdxi);
}; };

View File

@ -141,10 +141,11 @@ public:
Array< Point<3> > * x, Array< Point<3> > * x,
Array< Mat<3,3> > * dxdxi); Array< Mat<3,3> > * dxdxi);
template <typename T>
void CalcMultiPointElementTransformation (ElementIndex elnr, int n, void CalcMultiPointElementTransformation (ElementIndex elnr, int n,
const double * xi, size_t sxi, const T * xi, size_t sxi,
double * x, size_t sx, T * x, size_t sx,
double * dxdxi, size_t sdxdxi); T * dxdxi, size_t sdxdxi);
@ -196,10 +197,11 @@ private:
Vec<3> hcoefs[10]; // enough for second order tets Vec<3> hcoefs[10]; // enough for second order tets
}; };
template <typename T>
void CalcElementShapes (ElementInfo & info, const Point<3> & xi, Vector & shapes) const; void CalcElementShapes (ElementInfo & info, Point<3,T> xi, TFlatVector<T> shapes) const;
void GetCoefficients (ElementInfo & info, Vec<3> * coefs) const; void GetCoefficients (ElementInfo & info, Vec<3> * coefs) const;
void CalcElementDShapes (ElementInfo & info, const Point<3> & xi, MatrixFixWidth<3> & dshapes) const; template <typename T>
void CalcElementDShapes (ElementInfo & info, const Point<3,T> xi, MatrixFixWidth<3,T> dshapes) const;
class SurfaceElementInfo class SurfaceElementInfo

View File

@ -1965,9 +1965,9 @@ namespace netgen
} }
} }
template <typename T>
void Element :: void Element ::
GetDShapeNew (const Point<3> & p, MatrixFixWidth<3> & dshape) const GetDShapeNew (const Point<3,T> & p, MatrixFixWidth<3,T> & dshape) const
{ {
switch (typ) switch (typ)
{ {

View File

@ -125,7 +125,6 @@ namespace netgen
PointIndex operator-- (int) { PointIndex hi(*this); i--; return hi; } PointIndex operator-- (int) { PointIndex hi(*this); i--; return hi; }
PointIndex operator++ () { i++; return *this; } PointIndex operator++ () { i++; return *this; }
PointIndex operator-- () { i--; return *this; } PointIndex operator-- () { i--; return *this; }
#ifdef BASE0 #ifdef BASE0
enum { BASE = 0 }; enum { BASE = 0 };
#else #else
@ -718,9 +717,12 @@ namespace netgen
void GetShape (const Point<3> & p, class Vector & shape) const; void GetShape (const Point<3> & p, class Vector & shape) const;
void GetShapeNew (const Point<3> & p, class FlatVector & shape) const; void GetShapeNew (const Point<3> & p, class FlatVector & shape) const;
template <typename T>
void GetShapeNew (const Point<3,T> & p, TFlatVector<T> shape) const;
/// matrix 2 * np /// matrix 2 * np
void GetDShape (const Point<3> & p, class DenseMatrix & dshape) const; void GetDShape (const Point<3> & p, class DenseMatrix & dshape) const;
void GetDShapeNew (const Point<3> & p, class MatrixFixWidth<3> & dshape) const; template <typename T>
void GetDShapeNew (const Point<3,T> & p, class MatrixFixWidth<3,T> & dshape) const;
/// matrix 3 * np /// matrix 3 * np
void GetPointMatrix (const T_POINTS & points, void GetPointMatrix (const T_POINTS & points,
class DenseMatrix & pmat) const; class DenseMatrix & pmat) const;