From 8414cb2d60f0c9388edf04519be0636e8f9ab019 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Wed, 6 Jul 2016 17:12:57 +0100 Subject: [PATCH] using simds for element-trafo, added type-template to many classes --- libsrc/general/array.hpp | 4 +- libsrc/general/myadt.hpp | 3 + libsrc/general/mysimd.hpp | 358 ++++++++++++++++++++++ libsrc/gprim/geomobjects.hpp | 80 ++--- libsrc/gprim/geomops.hpp | 8 +- libsrc/interface/nginterface_v2.cpp | 8 + libsrc/linalg/densemat.hpp | 116 +++++++- libsrc/linalg/vector.hpp | 41 +++ libsrc/meshing/curvedelems.cpp | 444 +++++++++++++++++----------- libsrc/meshing/curvedelems.hpp | 14 +- libsrc/meshing/meshtype.cpp | 4 +- libsrc/meshing/meshtype.hpp | 6 +- 12 files changed, 854 insertions(+), 232 deletions(-) create mode 100644 libsrc/general/mysimd.hpp diff --git a/libsrc/general/array.hpp b/libsrc/general/array.hpp index 8cf9ee19..1429f2c8 100644 --- a/libsrc/general/array.hpp +++ b/libsrc/general/array.hpp @@ -416,9 +416,9 @@ namespace netgen using Array::data; using Array::ownmem; - // T mem[S]; // Intel C++ calls dummy constructor + T mem[S]; // Intel C++ calls dummy constructor // char mem[S*sizeof(T)]; - double mem[(S*sizeof(T)+7) / 8]; + // double mem[(S*sizeof(T)+7) / 8]; public: /// Generate array of logical and physical size asize explicit ArrayMem(int asize = 0) diff --git a/libsrc/general/myadt.hpp b/libsrc/general/myadt.hpp index c5b9c5ec..8fa8b95d 100644 --- a/libsrc/general/myadt.hpp +++ b/libsrc/general/myadt.hpp @@ -46,4 +46,7 @@ #include "gzstream.h" #include "archive_base.hpp" +#include "mysimd.hpp" + + #endif diff --git a/libsrc/general/mysimd.hpp b/libsrc/general/mysimd.hpp new file mode 100644 index 00000000..ed67dbec --- /dev/null +++ b/libsrc/general/mysimd.hpp @@ -0,0 +1,358 @@ +#ifndef FILE_MYSIMD +#define FILE_MYSIMD + +/**************************************************************************/ +/* File: mysimd.hpp */ +/* Author: Joachim Schoeberl */ +/* Date: 25. Mar. 16 */ +/**************************************************************************/ + +#include + +#ifdef WIN32 +inline __m128d operator- (__m128d a) { return _mm_xor_pd(a, _mm_set1_pd(-0.0)); } +inline __m128d operator+ (__m128d a, __m128d b) { return _mm_add_pd(a,b); } +inline __m128d operator- (__m128d a, __m128d b) { return _mm_sub_pd(a,b); } +inline __m128d operator* (__m128d a, __m128d b) { return _mm_mul_pd(a,b); } +inline __m128d operator/ (__m128d a, __m128d b) { return _mm_div_pd(a,b); } +inline __m128d operator* (double a, __m128d b) { return _mm_set1_pd(a)*b; } +inline __m128d operator* (__m128d b, double a) { return _mm_set1_pd(a)*b; } + +inline __m128d operator+= (__m128d &a, __m128d b) { return a = a+b; } +inline __m128d operator-= (__m128d &a, __m128d b) { return a = a-b; } +inline __m128d operator*= (__m128d &a, __m128d b) { return a = a*b; } +inline __m128d operator/= (__m128d &a, __m128d b) { return a = a/b; } + +inline __m256d operator- (__m256d a) { return _mm256_xor_pd(a, _mm256_set1_pd(-0.0)); } +inline __m256d operator+ (__m256d a, __m256d b) { return _mm256_add_pd(a,b); } +inline __m256d operator- (__m256d a, __m256d b) { return _mm256_sub_pd(a,b); } +inline __m256d operator* (__m256d a, __m256d b) { return _mm256_mul_pd(a,b); } +inline __m256d operator/ (__m256d a, __m256d b) { return _mm256_div_pd(a,b); } +inline __m256d operator* (double a, __m256d b) { return _mm256_set1_pd(a)*b; } +inline __m256d operator* (__m256d b, double a) { return _mm256_set1_pd(a)*b; } + +inline __m256d operator+= (__m256d &a, __m256d b) { return a = a+b; } +inline __m256d operator-= (__m256d &a, __m256d b) { return a = a-b; } +inline __m256d operator*= (__m256d &a, __m256d b) { return a = a*b; } +inline __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; } +#endif + + + +namespace netgen +{ + template class SIMD; + + template + struct has_call_operator + { + template static std::true_type check( decltype( sizeof(&C::operator() )) ) { return std::true_type(); } + template static std::false_type check(...) { return std::false_type(); } + typedef decltype( check(sizeof(char)) ) type; + static constexpr type value = type(); + }; + +#ifdef __AVX2__ + + template<> + class alignas(32) SIMD + { + __m256d data; + + public: + static constexpr int Size() { return 4; } + SIMD () = default; + SIMD (const SIMD &) = default; + SIMD & operator= (const SIMD &) = default; + + SIMD (double val) + { + data = _mm256_set1_pd(val); + } + + SIMD (__m256d adata) + : data(adata) + { ; } + + /* + template + SIMD (const T & val) + { +// SIMD_function(val, std::is_convertible>()); + SIMD_function(val, has_call_operator::value); + } + */ + + /* + template + SIMD & operator= (const T & val) + { +// SIMD_function(val, std::is_convertible>()); + SIMD_function(val, has_call_operator::value); + return *this; + } + */ + + + template + void SIMD_function (const Function & func, std::true_type) + { + data = _mm256_set_pd(func(3), func(2), func(1), func(0)); + } + + // not a function + void SIMD_function (double const * p, std::false_type) + { + data = _mm256_loadu_pd(p); + } + + void SIMD_function (double val, std::false_type) + { + data = _mm256_set1_pd(val); + } + + void SIMD_function (__m256d _data, std::false_type) + { + data = _data; + } + + inline double operator[] (int i) const { return ((double*)(&data))[i]; } + inline __m256d Data() const { return data; } + inline __m256d & Data() { return data; } + }; + + + inline SIMD operator+ (SIMD a, SIMD b) { return a.Data()+b.Data(); } + inline SIMD operator- (SIMD a, SIMD b) { return a.Data()-b.Data(); } + inline SIMD operator- (SIMD a) { return -a.Data(); } + inline SIMD operator* (SIMD a, SIMD b) { return a.Data()*b.Data(); } + inline SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } + inline SIMD operator* (double a, SIMD b) { return SIMD(a)*b; } + inline SIMD operator* (SIMD b, double a) { return SIMD(a)*b; } + inline SIMD operator+= (SIMD & a, SIMD b) { return a.Data()+=b.Data(); } + inline SIMD operator-= (SIMD & a, SIMD b) { return a.Data()-=b.Data(); } + inline SIMD operator*= (SIMD & a, SIMD b) { return a.Data()*=b.Data(); } + inline SIMD operator/= (SIMD & a, SIMD b) { return a.Data()/=b.Data(); } + + using std::sqrt; + using std::fabs; + + inline SIMD sqrt (SIMD a) { return _mm256_sqrt_pd(a.Data()); } + inline SIMD fabs (SIMD a) { return _mm256_max_pd(a.Data(), -a.Data()); } + inline SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } + inline SIMD Trans (SIMD a) { return a; } + inline SIMD IfPos (SIMD a, SIMD b, SIMD c) + { + auto cp = _mm256_cmp_pd (a.Data(), _mm256_setzero_pd(), _CMP_GT_OS); + return _mm256_blendv_pd(c.Data(), b.Data(), cp); + } + + inline double HSum (SIMD sd) + { + __m128d hv = _mm_add_pd (_mm256_extractf128_pd(sd.Data(),0), _mm256_extractf128_pd(sd.Data(),1)); + return _mm_cvtsd_f64 (_mm_hadd_pd (hv, hv)); + } + + + + +#else + + template<> + class SIMD + { + double data; + + public: + static constexpr int Size() { return 1; } + SIMD () = default; + SIMD (const SIMD &) = default; + SIMD & operator= (const SIMD &) = default; + + template + SIMD (const T & val) + { +// SIMD_function(val, std::is_convertible>()); + SIMD_function(val, has_call_operator::value); + } + + template + SIMD & operator= (const T & val) + { +// SIMD_function(val, std::is_convertible>()); + SIMD_function(val, has_call_operator::value); + return *this; + } + + template + void SIMD_function (const Function & func, std::true_type) + { + data = func(0); + } + + // not a function + void SIMD_function (double const * p, std::false_type) + { + data = *p; + } + + void SIMD_function (double val, std::false_type) + { + data = val; + } + + double operator[] (int i) const { return ((double*)(&data))[i]; } + double Data() const { return data; } + double & Data() { return data; } + }; + + + inline SIMD operator+ (SIMD a, SIMD b) { return a.Data()+b.Data(); } + inline SIMD operator- (SIMD a, SIMD b) { return a.Data()-b.Data(); } + inline SIMD operator- (SIMD a) { return -a.Data(); } + inline SIMD operator* (SIMD a, SIMD b) { return a.Data()*b.Data(); } + inline SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } + inline SIMD operator* (double a, SIMD b) { return SIMD(a)*b; } + inline SIMD operator* (SIMD b, double a) { return SIMD(a)*b; } + inline SIMD operator+= (SIMD & a, SIMD b) { return a.Data()+=b.Data(); } + inline SIMD operator-= (SIMD & a, SIMD b) { return a.Data()-=b.Data(); } + inline SIMD operator*= (SIMD & a, SIMD b) { return a.Data()*=b.Data(); } + inline SIMD operator/= (SIMD & a, SIMD b) { return a.Data()/=b.Data(); } + + inline SIMD sqrt (SIMD a) { return std::sqrt(a.Data()); } + inline SIMD fabs (SIMD a) { return std::fabs(a.Data()); } + inline SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } + inline SIMD Trans (SIMD a) { return a; } + inline SIMD IfPos (SIMD a, SIMD b, SIMD c) + { + return (a.Data() > 0) ? b : c; + } + + inline double HSum (SIMD sd) + { return sd.Data(); } + +#endif + + + + + + + + template + ostream & operator<< (ostream & ost, SIMD simd) + { + ost << simd[0]; + for (int i = 1; i < simd.Size(); i++) + ost << " " << simd[i]; + return ost; + } + + /* + using std::exp; + inline netgen::SIMD exp (netgen::SIMD a) { + return netgen::SIMD([&](int i)->double { return exp(a[i]); } ); +} + + using std::log; +inline netgen::SIMD log (netgen::SIMD a) { + return netgen::SIMD([&](int i)->double { return log(a[i]); } ); +} + + using std::pow; +inline netgen::SIMD pow (netgen::SIMD a, double x) { + return netgen::SIMD([&](int i)->double { return pow(a[i],x); } ); +} + */ + + + template + class MultiSIMD + { + SIMD head; + MultiSIMD tail; + public: + MultiSIMD () = default; + MultiSIMD (const MultiSIMD & ) = default; + MultiSIMD (T v) : head(v), tail(v) { ; } + MultiSIMD (SIMD _head, MultiSIMD _tail) + : head(_head), tail(_tail) { ; } + SIMD Head() const { return head; } + MultiSIMD Tail() const { return tail; } + SIMD & Head() { return head; } + MultiSIMD & Tail() { return tail; } + + template + SIMD Get() const { return NR==0 ? head : tail.template Get(); } + template + SIMD & Get() { return NR==0 ? head : tail.template Get(); } + }; + + template + class MultiSIMD<2,T> + { + SIMD v0, v1; + public: + MultiSIMD () = default; + MultiSIMD (const MultiSIMD & ) = default; + MultiSIMD (T v) : v0(v), v1(v) { ; } + MultiSIMD (SIMD _v0, SIMD _v1) : v0(_v0), v1(_v1) { ; } + + SIMD Head() const { return v0; } + SIMD Tail() const { return v1; } + SIMD & Head() { return v0; } + SIMD & Tail() { return v1; } + + template + SIMD Get() const { return NR==0 ? v0 : v1; } + template + SIMD & Get() { return NR==0 ? v0 : v1; } + }; + + template inline MultiSIMD operator+ (MultiSIMD a, MultiSIMD b) + { return MultiSIMD (a.Head()+b.Head(), a.Tail()+b.Tail()); } + template inline MultiSIMD operator+ (double a, MultiSIMD b) + { return MultiSIMD (a+b.Head(), a+b.Tail()); } + template inline MultiSIMD operator+ (MultiSIMD b, double a) + { return MultiSIMD (a+b.Head(), a+b.Tail()); } + + template inline MultiSIMD operator- (MultiSIMD a, MultiSIMD b) + { return MultiSIMD (a.Head()-b.Head(), a.Tail()-b.Tail()); } + template inline MultiSIMD operator- (double a, MultiSIMD b) + { return MultiSIMD (a-b.Head(), a-b.Tail()); } + template inline MultiSIMD operator- (MultiSIMD b, double a) + { return MultiSIMD (b.Head()-a, b.Tail()-a); } + template inline MultiSIMD operator- (MultiSIMD a) + { return MultiSIMD (-a.Head(), -a.Tail()); } + template inline MultiSIMD operator* (MultiSIMD a, MultiSIMD b) + { return MultiSIMD (a.Head()*b.Head(), a.Tail()*b.Tail()); } + template inline MultiSIMD operator/ (MultiSIMD a, MultiSIMD b) + { return MultiSIMD (a.Head()/b.Head(), a.Tail()/b.Tail()); } + template inline MultiSIMD operator* (double a, MultiSIMD b) + { return MultiSIMD ( a*b.Head(), a*b.Tail()); } + template inline MultiSIMD operator* (MultiSIMD b, double a) + { return MultiSIMD ( a*b.Head(), a*b.Tail()); } + + template inline MultiSIMD & operator+= (MultiSIMD & a, MultiSIMD b) + { a.Head()+=b.Head(); a.Tail()+=b.Tail(); return a; } + template inline MultiSIMD operator-= (MultiSIMD & a, double b) + { a.Head()-=b; a.Tail()-=b; return a; } + template inline MultiSIMD operator-= (MultiSIMD & a, MultiSIMD b) + { a.Head()-=b.Head(); a.Tail()-=b.Tail(); return a; } + template inline MultiSIMD & operator*= (MultiSIMD & a, MultiSIMD b) + { a.Head()*=b.Head(); a.Tail()*=b.Tail(); return a; } + template inline MultiSIMD & operator*= (MultiSIMD & a, double b) + { a.Head()*=b; a.Tail()*=b; return a; } + // inline MultiSIMD operator/= (MultiSIMD & a, MultiSIMD b) { return a.Data()/=b.Data(); } + + + template + ostream & operator<< (ostream & ost, MultiSIMD multi) + { + ost << multi.Head() << " " << multi.Tail(); + return ost; + } + +} + +#endif diff --git a/libsrc/gprim/geomobjects.hpp b/libsrc/gprim/geomobjects.hpp index ea80f7ff..d4ca640f 100644 --- a/libsrc/gprim/geomobjects.hpp +++ b/libsrc/gprim/geomobjects.hpp @@ -12,31 +12,31 @@ namespace netgen { - template class Vec; - template class Point; + template class Vec; + template class Point; - template + template class Point { protected: - double x[D]; + T x[D]; public: Point () { ; } - Point (double ax) { for (int i = 0; i < D; i++) x[i] = ax; } - Point (double ax, double ay) + Point (T ax) { for (int i = 0; i < D; i++) x[i] = ax; } + Point (T ax, T ay) { // static_assert(D==2, "Point constructor with 2 args called"); x[0] = ax; x[1] = ay; } - Point (double ax, double ay, double az) + Point (T ax, T ay, T az) { // static_assert(D==3, "Point constructor with 3 args called"); x[0] = ax; x[1] = ay; x[2] = az; } - Point (double ax, double ay, double az, double au) + Point (T ax, T ay, T az, T au) { x[0] = ax; x[1] = ay; x[2] = az; x[3] = au;} Point (const Point & p2) @@ -52,39 +52,39 @@ namespace netgen return *this; } - Point & operator= (double val) + Point & operator= (T val) { for (int i = 0; i < D; i++) x[i] = val; return *this; } - double & operator() (int i) { return x[i]; } - const double & operator() (int i) const { return x[i]; } + T & operator() (int i) { return x[i]; } + const T & operator() (int i) const { return x[i]; } - operator const double* () const { return x; } + operator const T* () const { return x; } }; - template + template class Vec { protected: - double x[D]; + T x[D]; public: Vec () { ; } // for (int i = 0; i < D; i++) x[i] = 0; } - Vec (double ax) { for (int i = 0; i < D; i++) x[i] = ax; } - Vec (double ax, double ay) + Vec (T ax) { for (int i = 0; i < D; i++) x[i] = ax; } + Vec (T ax, T ay) { // static_assert(D==2, "Vec constructor with 2 args called"); x[0] = ax; x[1] = ay; } - Vec (double ax, double ay, double az) + Vec (T ax, T ay, T az) { // static_assert(D==3, "Vec constructor with 3 args called"); x[0] = ax; x[1] = ay; x[2] = az; } - Vec (double ax, double ay, double az, double au) + Vec (T ax, T ay, T az, T au) { x[0] = ax; x[1] = ay; x[2] = az; x[3] = au; } Vec (const Vec & p2) @@ -104,28 +104,28 @@ namespace netgen return *this; } - Vec & operator= (double s) + Vec & operator= (T s) { for (int i = 0; i < D; i++) x[i] = s; return *this; } - double & operator() (int i) { return x[i]; } - const double & operator() (int i) const { return x[i]; } + T & operator() (int i) { return x[i]; } + const T & operator() (int i) const { return x[i]; } - operator const double* () const { return x; } + operator const T* () const { return x; } - double Length () const + T Length () const { - double l = 0; + T l = 0; for (int i = 0; i < D; i++) l += x[i] * x[i]; return sqrt (l); } - double Length2 () const + T Length2 () const { - double l = 0; + T l = 0; for (int i = 0; i < D; i++) l += x[i] * x[i]; return l; @@ -133,7 +133,7 @@ namespace netgen const Vec & Normalize () { - double l = Length(); + T l = Length(); if (l != 0) for (int i = 0; i < D; i++) x[i] /= l; @@ -147,19 +147,19 @@ namespace netgen - template + template class Mat { protected: - double x[H*W]; + T x[H*W]; public: Mat () { ; } Mat (const Mat & b) { for (int i = 0; i < H*W; i++) x[i] = b.x[i]; } - Mat & operator= (double s) + Mat & operator= (T s) { for (int i = 0; i < H*W; i++) x[i] = s; return *this; @@ -171,30 +171,30 @@ namespace netgen return *this; } - double & operator() (int i, int j) { return x[i*W+j]; } - const double & operator() (int i, int j) const { return x[i*W+j]; } - double & operator() (int i) { return x[i]; } - const double & operator() (int i) const { return x[i]; } + T & operator() (int i, int j) { return x[i*W+j]; } + const T & operator() (int i, int j) const { return x[i*W+j]; } + T & operator() (int i) { return x[i]; } + const T & operator() (int i) const { return x[i]; } - Vec Col (int i) const + Vec Col (int i) const { - Vec hv; + Vec hv; for (int j = 0; j < H; j++) hv(j) = x[j*W+i]; return hv; } - Vec Row (int i) const + Vec Row (int i) const { - Vec hv; + Vec hv; for (int j = 0; j < W; j++) hv(j) = x[i*W+j]; return hv; } - void Solve (const Vec & rhs, Vec & sol) const + void Solve (const Vec & rhs, Vec & sol) const { - Mat inv; + Mat inv; CalcInverse (*this, inv); sol = inv * rhs; } diff --git a/libsrc/gprim/geomops.hpp b/libsrc/gprim/geomops.hpp index 90c3cb6b..45614c6b 100644 --- a/libsrc/gprim/geomops.hpp +++ b/libsrc/gprim/geomops.hpp @@ -306,14 +306,14 @@ namespace netgen return m; } - - inline Mat<3,3> operator* (const Mat<3,3> & a, const Mat<3,3> & b) + template + inline Mat<3,3,T> operator* (const Mat<3,3,T> & a, const Mat<3,3,T> & b) { - Mat<3,3> m; + Mat<3,3,T> m; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) { - double sum = 0; + T sum = T(0); for (int k = 0; k < 3; k++) sum += a(i,k) * b(k, j); m(i,j) = sum; diff --git a/libsrc/interface/nginterface_v2.cpp b/libsrc/interface/nginterface_v2.cpp index b39698ef..56e34ce2 100644 --- a/libsrc/interface/nginterface_v2.cpp +++ b/libsrc/interface/nginterface_v2.cpp @@ -645,6 +645,13 @@ namespace netgen __m256d * x, size_t sx, __m256d * dxdxi, size_t sdxdxi) const { + mesh->GetCurvedElements().CalcMultiPointElementTransformation + (elnr, npts, + reinterpret_cast*> (xi), sxi, + reinterpret_cast*> (x), sx, + reinterpret_cast*> (dxdxi), sdxdxi); + + /* for (int i = 0; i < npts; i++) { double hxi[4][3]; @@ -665,6 +672,7 @@ namespace netgen x += sx; dxdxi += sdxdxi; } + */ } template<> DLL_HEADER void Ngx_Mesh :: diff --git a/libsrc/linalg/densemat.hpp b/libsrc/linalg/densemat.hpp index 28ca59ba..9b007202 100644 --- a/libsrc/linalg/densemat.hpp +++ b/libsrc/linalg/densemat.hpp @@ -152,9 +152,120 @@ extern ostream & operator<< (ostream & ost, const DenseMatrix & m); -template +template class MatrixFixWidth { +protected: + int height; + T * __restrict data; + bool ownmem; +public: + /// + MatrixFixWidth () + { height = 0; data = 0; ownmem = false; } + /// + MatrixFixWidth (int h) + { height = h; data = new T[WIDTH*height]; ownmem = true; } + /// + MatrixFixWidth (int h, T * adata) + { height = h; data = adata; ownmem = false; } + /// + MatrixFixWidth (const MatrixFixWidth & m2) + : height(m2.height), data(m2.data), ownmem(false) + { ; } + /// + ~MatrixFixWidth () + { if (ownmem) delete [] data; } + + void SetSize (int h) + { + if (h != height) + { + if (ownmem) delete data; + height = h; + data = new T[WIDTH*height]; + ownmem = true; + } + } + + /// + int Height() const { return height; } + + /// + int Width() const { return WIDTH; } + MatrixFixWidth & operator= (const MatrixFixWidth & m2) + { + for (int i = 0; i < height*WIDTH; i++) + data[i] = m2.data[i]; + } + /// + MatrixFixWidth & operator= (T v) + { + for (int i = 0; i < height*WIDTH; i++) + data[i] = v; + return *this; + } + + /* + /// + void Mult (const FlatVector & v, FlatVector & prod) const + { + T sum; + const T * mp, * sp; + T * dp; + + mp = data; + dp = &prod[0]; + for (int i = 0; i < height; i++) + { + sum = 0; + sp = &v[0]; + + for (int j = 0; j < WIDTH; j++) + { + sum += *mp * *sp; + mp++; + sp++; + } + + *dp = sum; + dp++; + } + } + */ + + T & operator() (int i, int j) + { return data[i*WIDTH+j]; } + + const T & operator() (int i, int j) const + { return data[i*WIDTH+j]; } + + + MatrixFixWidth & operator*= (T v) + { + if (data) + for (int i = 0; i < height*WIDTH; i++) + data[i] *= v; + return *this; + } + + + + const T & Get(int i, int j) const { return data[(i-1)*WIDTH+j-1]; } + /// + const T & Get(int i) const { return data[i-1]; } + /// + void Set(int i, int j, T v) { data[(i-1)*WIDTH+j-1] = v; } + /// + T & Elem(int i, int j) { return data[(i-1)*WIDTH+j-1]; } + /// + const T & ConstElem(int i, int j) const { return data[(i-1)*WIDTH+j-1]; } +}; + + +template +class MatrixFixWidth +{ protected: int height; double * data; @@ -261,6 +372,9 @@ public: }; + + + template extern ostream & operator<< (ostream & ost, const MatrixFixWidth & m) { diff --git a/libsrc/linalg/vector.hpp b/libsrc/linalg/vector.hpp index e7c52e81..1b103d0e 100644 --- a/libsrc/linalg/vector.hpp +++ b/libsrc/linalg/vector.hpp @@ -79,6 +79,47 @@ public: }; +template +class TFlatVector +{ +protected: + int s; + T * data; +public: + TFlatVector () { ; } + TFlatVector (int as, T * adata) + { s = as; data = adata; } + + int Size () const + { return s; } + + TFlatVector & operator= (const TFlatVector & v) + { memcpy (data, v.data, s*sizeof(T)); return *this; } + + TFlatVector & operator= (T scal) + { + for (int i = 0; i < s; i++) data[i] = scal; + return *this; + } + + T & operator[] (int i) { return data[i]; } + const T & operator[] (int i) const { return data[i]; } + T & operator() (int i) { return data[i]; } + const T & operator() (int i) const { return data[i]; } + + // double & Elem (int i) { return data[i-1]; } + // const double & Get (int i) const { return data[i-1]; } + // void Set (int i, double val) { data[i-1] = val; } + + TFlatVector & operator*= (T scal) + { + for (int i = 0; i < s; i++) data[i] *= scal; + return *this; + } +}; + + + class Vector : public FlatVector { diff --git a/libsrc/meshing/curvedelems.cpp b/libsrc/meshing/curvedelems.cpp index f1673bb8..f5db4038 100644 --- a/libsrc/meshing/curvedelems.cpp +++ b/libsrc/meshing/curvedelems.cpp @@ -48,9 +48,10 @@ namespace netgen // compute edge bubbles up to order n, x \in (-1, 1) - static void CalcEdgeShape (int n, double x, double * shape) + template + static void CalcEdgeShape (int n, T x, T * shape) { - double p1 = x, p2 = -1, p3 = 0; + T p1 = x, p2 = -1, p3 = 0; for (int j=2; j<=n; j++) { p3=p2; p2=p1; @@ -76,10 +77,11 @@ namespace netgen } } - static void CalcEdgeShapeDx (int n, double x, double * shape, double * dshape) + template + static void CalcEdgeShapeDx (int n, T x, T * shape, T * dshape) { - double p1 = x, p2 = -1, p3 = 0; - double p1dx = 1, p2dx = 0, p3dx = 0; + T p1 = x, p2 = -1, p3 = 0; + T p1dx = 1, p2dx = 0, p3dx = 0; for (int j=2; j<=n; j++) { @@ -95,7 +97,8 @@ namespace netgen } // compute L_i(x/t) * t^i - static void CalcScaledEdgeShape (int n, double x, double t, double * shape) + template + static void CalcScaledEdgeShape (int n, T x, T t, T * shape) { static bool init = false; static double coefs[100][2]; @@ -109,8 +112,8 @@ namespace netgen init = true; } - double p1 = x, p2 = -1, p3 = 0; - double tt = t*t; + T p1 = x, p2 = -1, p3 = 0; + T tt = t*t; for (int j=0; j<=n-2; j++) { p3=p2; p2=p1; @@ -120,13 +123,13 @@ namespace netgen } } - template - static void CalcScaledEdgeShapeDxDt (int n, double x, double t, double * dshape) + template + static void CalcScaledEdgeShapeDxDt (int n, T x, T t, T * dshape) { - double p1 = x, p2 = -1, p3 = 0; - double p1dx = 1, p1dt = 0; - double p2dx = 0, p2dt = 0; - double p3dx = 0, p3dt = 0; + T p1 = x, p2 = -1, p3 = 0; + T p1dx = 1, p1dt = 0; + T p2dx = 0, p2dt = 0; + T p3dx = 0, p3dt = 0; for (int j=0; j<=n-2; j++) { @@ -225,7 +228,7 @@ namespace netgen template void Evaluate (int n, S x, T * values) { - S p1 = 1.0, p2 = 0.0, p3; + S p1(1.0), p2(0.0), p3; if (n >= 0) p2 = values[0] = 1.0; @@ -243,7 +246,7 @@ namespace netgen template void EvaluateScaled (int n, S x, S y, T * values) { - S p1 = 1.0, p2 = 0.0, p3; + S p1(1.0), p2(0.0), p3; if (n >= 0) p2 = values[0] = 1.0; @@ -314,7 +317,7 @@ namespace netgen if (n >= 0) values[0] = 1.0; */ - S p1 = 1.0, p2 = 0.0, p3; + S p1(1.0), p2(0.0), p3; if (n >= 0) p2 = values[0] = 1.0; @@ -378,14 +381,14 @@ namespace netgen shape[ii++] = hx[ix]*hy[iy+50*ix]; } - - static void CalcTrigShapeDxDy (int n, double x, double y, double * dshape) + template + static void CalcTrigShapeDxDy (int n, T x, T y, T * dshape) { if (n < 3) return; - AutoDiff<2> adx(x, 0); - AutoDiff<2> ady(y, 1); - AutoDiff<2> res[2000]; + AutoDiff<2,T> adx(x, 0); + AutoDiff<2,T> ady(y, 1); + AutoDiff<2,T> res[2000]; CalcTrigShape (n, adx, ady, &res[0]); int ndof = (n-1)*(n-2)/2; for (int i = 0; i < ndof; i++) @@ -421,13 +424,14 @@ namespace netgen // compute face bubbles up to order n, 0 < y, y-x < 1, x+y < 1 - static void CalcScaledTrigShapeDxDyDt (int n, double x, double y, double t, double * dshape) + template + static void CalcScaledTrigShapeDxDyDt (int n, T x, T y, T t, T * dshape) { if (n < 3) return; - AutoDiff<3> adx(x, 0); - AutoDiff<3> ady(y, 1); - AutoDiff<3> adt(t, 2); - AutoDiff<3> res[2000]; + AutoDiff<3,T> adx(x, 0); + AutoDiff<3,T> ady(y, 1); + AutoDiff<3,T> adt(t, 2); + AutoDiff<3,T> res[2000]; CalcScaledTrigShape (n, adx, ady, adt, &res[0]); int ndof = (n-1)*(n-2)/2; for (int i = 0; i < ndof; i++) @@ -1390,7 +1394,6 @@ namespace netgen - void CurvedElements :: CalcElementShapes (SegmentInfo & info, double xi, Vector & shapes) const { @@ -2281,9 +2284,6 @@ namespace netgen } - Vector shapes; - MatrixFixWidth<3> dshapes; - const Element & el = mesh[elnr]; ELEMENT_TYPE type = el.GetType(); @@ -2315,6 +2315,11 @@ namespace netgen } } + ArrayMem mem(info.ndof); + TFlatVector shapes(info.ndof, &mem[0]); + ArrayMem dshapes_mem(info.ndof*3); + MatrixFixWidth<3> dshapes(info.ndof, &dshapes_mem[0]); + CalcElementShapes (info, xi, shapes); Vec<3> * coefs = (info.ndof <= 10) ? @@ -2359,16 +2364,16 @@ namespace netgen - - void CurvedElements :: CalcElementShapes (ElementInfo & info, const Point<3> & xi, Vector & shapes) const + template + void CurvedElements :: CalcElementShapes (ElementInfo & info, Point<3,T> xi, TFlatVector shapes) const { const Element & el = mesh[info.elnr]; if (rational && info.order >= 2) { - shapes.SetSize(10); - double w = 1; - double lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; + // shapes.SetSize(10); + T w = 1; + T lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; for (int j = 0; j < 4; j++) shapes(j) = lami[j] * lami[j]; @@ -2384,7 +2389,7 @@ namespace netgen return; } - shapes.SetSize(info.ndof); + // shapes.SetSize(info.ndof); switch (el.GetType()) { @@ -2434,10 +2439,10 @@ namespace netgen case TET10: { - double x = xi(0); - double y = xi(1); - double z = xi(2); - double lam4 = 1 - x - y - z; + T x = xi(0); + T y = xi(1); + T z = xi(2); + T lam4 = 1 - x - y - z; /* shapes(0) = xi(0); shapes(1) = xi(1); @@ -2462,8 +2467,8 @@ namespace netgen case PRISM: { - double lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) }; - double lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) }; + T lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) }; + T lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) }; for (int i = 0; i < 6; i++) shapes(i) = lami[i] * lamiz[i]; for (int i = 6; i < info.ndof; i++) @@ -2483,7 +2488,7 @@ namespace netgen if (el[vi1] > el[vi2]) swap (vi1, vi2); CalcScaledEdgeShape (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &shapes(ii)); - double facz = (i < 3) ? (1-xi(2)) : xi(2); + T facz = (i < 3) ? (1-xi(2)) : xi(2); for (int j = 0; j < eorder-1; j++) shapes(ii+j) *= facz; @@ -2499,9 +2504,9 @@ namespace netgen int vi1 = edges[i][0]-1, vi2 = edges[i][1]-1; if (el[vi1] > el[vi2]) swap (vi1, vi2); - double bubz = lamiz[vi1]*lamiz[vi2]; - double polyz = lamiz[vi1] - lamiz[vi2]; - double bubxy = lami[vi1]; + T bubz = lamiz[vi1]*lamiz[vi2]; + T polyz = lamiz[vi1] - lamiz[vi2]; + T bubxy = lami[vi1]; for (int j = 0; j < eorder-1; j++) { @@ -2538,11 +2543,12 @@ namespace netgen case PYRAMID: { shapes = 0.0; - double x = xi(0); - double y = xi(1); - double z = xi(2); + T x = xi(0); + T y = xi(1); + T z = xi(2); - if (z == 1.) z = 1-1e-10; + // if (z == 1.) z = 1-1e-10; + z *= (1-1e-12); shapes[0] = (1-z-x)*(1-z-y) / (1-z); shapes[1] = x*(1-z-y) / (1-z); shapes[2] = x*y / (1-z); @@ -2551,7 +2557,7 @@ namespace netgen if (info.order == 1) return; - double sigma[4] = + T sigma[4] = { sigma[0] = ( (1-z-x) + (1-z-y) ), sigma[1] = ( x + (1-z-y) ), @@ -2570,7 +2576,7 @@ namespace netgen if (el[vi1] > el[vi2]) swap (vi1, vi2); CalcScaledEdgeShape (eorder, sigma[vi1]-sigma[vi2], 1-z, &shapes(ii)); - double fac = (shapes[vi1]+shapes[vi2]) / (1-z); + T fac = (shapes[vi1]+shapes[vi2]) / (1-z); for (int j = 0; j < eorder-1; j++) shapes(ii+j) *= fac; @@ -2586,9 +2592,9 @@ namespace netgen case HEX: { shapes = 0.0; - double x = xi(0); - double y = xi(1); - double z = xi(2); + T x = xi(0); + T y = xi(1); + T z = xi(2); shapes[0] = (1-x)*(1-y)*(1-z); shapes[1] = x *(1-y)*(1-z); @@ -2601,7 +2607,7 @@ namespace netgen if (info.order == 1) return; - double mu[8] = { + T mu[8] = { (1-x)+(1-y)+(1-z), x +(1-y)+(1-z), x + y +(1-z), @@ -2624,7 +2630,7 @@ namespace netgen if (el[vi1] > el[vi2]) swap (vi1, vi2); CalcEdgeShape (eorder, mu[vi1]-mu[vi2], &shapes(ii)); - double lame = shapes(vi1)+shapes(vi2); + T lame = shapes(vi1)+shapes(vi2); for (int j = 0; j < order-1; j++) shapes(ii+j) *= lame; ii += eorder-1; @@ -2642,24 +2648,27 @@ namespace netgen } + template void CurvedElements :: - CalcElementDShapes (ElementInfo & info, const Point<3> & xi, MatrixFixWidth<3> & dshapes) const + CalcElementDShapes (ElementInfo & info, const Point<3,T> xi, MatrixFixWidth<3,T> dshapes) const { + // static int timer = NgProfiler::CreateTimer ("calcelementdshapes"); + const Element & el = mesh[info.elnr]; - dshapes.SetSize(info.ndof); - dshapes = 0.0; - - - + // dshapes.SetSize(info.ndof); + if ( (long int)(&dshapes(0,0)) % alignof(T) != 0) + throw NgException ("alignment problem"); + if (dshapes.Height() != info.ndof) + throw NgException ("wrong height"); if (rational && info.order >= 2) { - double w = 1; - double dw[3] = { 0, 0, 0 }; + T w = 1; + T dw[3] = { 0, 0, 0 }; - double lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; - double dlami[4][3] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { -1, -1, -1 }}; - double shapes[10]; + T lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; + T dlami[4][3] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { -1, -1, -1 }}; + T shapes[10]; for (int j = 0; j < 4; j++) { @@ -2672,7 +2681,7 @@ namespace netgen const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TET); for (int j = 0; j < 6; j++) { - double wi = edgeweight[info.edgenrs[j]]; + T wi = edgeweight[info.edgenrs[j]]; shapes[j+4] = 2 * wi * lami[edges[j][0]-1] * lami[edges[j][1]-1]; for (int k = 0; k < 3; k++) @@ -2696,6 +2705,8 @@ namespace netgen { case TET: { + dshapes = T(0.0); + dshapes(0,0) = 1; dshapes(1,1) = 1; dshapes(2,2) = 1; @@ -2705,7 +2716,7 @@ namespace netgen if (info.order == 1) return; - double lami[] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; + T lami[] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; int ii = 4; const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TET); for (int i = 0; i < 6; i++) @@ -2718,7 +2729,7 @@ namespace netgen CalcScaledEdgeShapeDxDt<3> (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &dshapes(ii,0)); - Mat<2,3> trans; + Mat<2,3,T> trans; for (int j = 0; j < 3; j++) { trans(0,j) = dshapes(vi1,j)-dshapes(vi2,j); @@ -2727,8 +2738,8 @@ namespace netgen for (int j = 0; j < order-1; j++) { - double ddx = dshapes(ii+j,0); - double ddt = dshapes(ii+j,1); + T ddx = dshapes(ii+j,0); + T ddt = dshapes(ii+j,1); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); dshapes(ii+j,2) = ddx * trans(0,2) + ddt * trans(1,2); @@ -2754,7 +2765,7 @@ namespace netgen lami[fnums[2]], lami[fnums[0]]+lami[fnums[1]]+lami[fnums[2]], &dshapes(ii,0)); - Mat<3,3> trans; + Mat<3,3,T> trans; for (int j = 0; j < 3; j++) { trans(0,j) = dshapes(fnums[1],j)-dshapes(fnums[0],j); @@ -2765,9 +2776,9 @@ namespace netgen int nfd = (forder-1)*(forder-2)/2; for (int j = 0; j < nfd; j++) { - double ddx = dshapes(ii+j,0); - double ddy = dshapes(ii+j,1); - double ddt = dshapes(ii+j,2); + T ddx = dshapes(ii+j,0); + T ddy = dshapes(ii+j,1); + T ddt = dshapes(ii+j,2); dshapes(ii+j,0) = ddx * trans(0,0) + ddy * trans(1,0) + ddt * trans(2,0); dshapes(ii+j,1) = ddx * trans(0,1) + ddy * trans(1,1) + ddt * trans(2,1); dshapes(ii+j,2) = ddx * trans(0,2) + ddy * trans(1,2) + ddt * trans(2,2); @@ -2784,7 +2795,7 @@ namespace netgen { if (dshapes.Height() == 4) { - dshapes = 0.0; + dshapes = T(0.0); dshapes(0,0) = 1; dshapes(1,1) = 1; @@ -2795,11 +2806,11 @@ namespace netgen } else { - AutoDiff<3> x(xi(0), 0); - AutoDiff<3> y(xi(1), 1); - AutoDiff<3> z(xi(2), 2); - AutoDiff<3> lam4 = 1-x-y-z; - AutoDiff<3> shapes[10]; + AutoDiff<3,T> x(xi(0), 0); + AutoDiff<3,T> y(xi(1), 1); + AutoDiff<3,T> z(xi(2), 2); + AutoDiff<3,T> lam4 = 1-x-y-z; + AutoDiff<3,T> shapes[10]; shapes[0] = 2 * x * x - x; shapes[1] = 2 * y * y - y; @@ -2829,10 +2840,10 @@ namespace netgen case PRISM: { - double lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) }; - double lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) }; - double dlamiz[6] = { -1, -1, -1, 1, 1, 1 }; - double dlami[6][2] = + T lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) }; + T lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) }; + T dlamiz[6] = { -1, -1, -1, 1, 1, 1 }; + T dlami[6][2] = { { 1, 0, }, { 0, 1, }, { -1, -1 }, @@ -2863,11 +2874,12 @@ namespace netgen vi1 = vi1 % 3; vi2 = vi2 % 3; - Vector shapei(order+1); + ArrayMem shapei_mem(order+1); + TFlatVector shapei(order+1, &shapei_mem[0]); CalcScaledEdgeShapeDxDt<3> (order, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &dshapes(ii,0) ); CalcScaledEdgeShape(order, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &shapei(0) ); - Mat<2,2> trans; + Mat<2,2,T> trans; for (int j = 0; j < 2; j++) { trans(0,j) = dlami[vi1][j]-dlami[vi2][j]; @@ -2876,16 +2888,16 @@ namespace netgen for (int j = 0; j < order-1; j++) { - double ddx = dshapes(ii+j,0); - double ddt = dshapes(ii+j,1); + T ddx = dshapes(ii+j,0); + T ddt = dshapes(ii+j,1); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); } - double facz = (i < 3) ? (1-xi(2)) : xi(2); - double dfacz = (i < 3) ? (-1) : 1; + T facz = (i < 3) ? (1-xi(2)) : xi(2); + T dfacz = (i < 3) ? (-1) : 1; for (int j = 0; j < order-1; j++) { dshapes(ii+j,0) *= facz; @@ -2905,13 +2917,13 @@ namespace netgen int vi1 = (edges[i][0]-1), vi2 = (edges[i][1]-1); if (el[vi1] > el[vi2]) swap (vi1, vi2); - double bubz = lamiz[vi1] * lamiz[vi2]; - double dbubz = dlamiz[vi1]*lamiz[vi2] + lamiz[vi1]*dlamiz[vi2]; - double polyz = lamiz[vi1] - lamiz[vi2]; - double dpolyz = dlamiz[vi1] - dlamiz[vi2]; - double bubxy = lami[(vi1)%3]; - double dbubxydx = dlami[(vi1)%3][0]; - double dbubxydy = dlami[(vi1)%3][1]; + T bubz = lamiz[vi1] * lamiz[vi2]; + T dbubz = dlamiz[vi1]*lamiz[vi2] + lamiz[vi1]*dlamiz[vi2]; + T polyz = lamiz[vi1] - lamiz[vi2]; + T dpolyz = dlamiz[vi1] - dlamiz[vi2]; + T bubxy = lami[(vi1)%3]; + T dbubxydx = dlami[(vi1)%3][0]; + T dbubxydy = dlami[(vi1)%3][1]; for (int j = 0; j < eorder-1; j++) { @@ -2942,8 +2954,10 @@ namespace netgen if(el[fav[1]] > el[fav[2]]) swap(fav[1],fav[2]); if(el[fav[0]] > el[fav[1]]) swap(fav[0],fav[1]); - MatrixFixWidth<2> dshapei(ndf); - Vector shapei(ndf); + ArrayMem dshapei_mem(ndf); + ArrayMem shapei_mem(ndf); + MatrixFixWidth<2,T> dshapei(ndf, &dshapei_mem[0]); + TFlatVector shapei(ndf, &shapei_mem[0]); CalcTrigShapeDxDy (forder, lami[fav[2]]-lami[fav[1]], lami[fav[0]], @@ -2951,7 +2965,7 @@ namespace netgen CalcTrigShape (forder, lami[fav[2]]-lami[fav[1]], lami[fav[0]], &shapei(0)); - Mat<2,2> trans; + Mat<2,2,T> trans; for (int j = 0; j < 2; j++) { trans(0,j) = dlami[fav[2]][j]-dlami[fav[1]][j]; @@ -2962,8 +2976,8 @@ namespace netgen { // double ddx = dshapes(ii+j,0); // double ddt = dshapes(ii+j,1); - double ddx = dshapei(j,0); - double ddt = dshapei(j,1); + T ddx = dshapei(j,0); + T ddt = dshapei(j,1); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); } @@ -2983,14 +2997,15 @@ namespace netgen case PYRAMID: { - dshapes = 0.0; - double x = xi(0); - double y = xi(1); - double z = xi(2); + dshapes = T(0.0); + T x = xi(0); + T y = xi(1); + T z = xi(2); - if (z == 1.) z = 1-1e-10; - double z1 = 1-z; - double z2 = z1*z1; + // if (z == 1.) z = 1-1e-10; + z *= 1-1e-12; + T z1 = 1-z; + T z2 = z1*z1; dshapes(0,0) = -(z1-y)/z1; dshapes(0,1) = -(z1-x)/z1; @@ -3016,29 +3031,30 @@ namespace netgen int ii = 5; const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (PYRAMID); - if (z == 1.) z = 1-1e-10; - double shapes[5]; + // if (z == 1.) z = 1-1e-10; + z *= 1-1e-12; + T shapes[5]; shapes[0] = (1-z-x)*(1-z-y) / (1-z); shapes[1] = x*(1-z-y) / (1-z); shapes[2] = x*y / (1-z); shapes[3] = (1-z-x)*y / (1-z); shapes[4] = z; - double sigma[4] = + T sigma[4] = { ( (1-z-x) + (1-z-y) ), ( x + (1-z-y) ), ( x + y ), ( (1-z-x) + y ), }; - double dsigma[4][3] = + T dsigma[4][3] = { { -1, -1, -2 }, { 1, -1, -1 }, { 1, 1, 0 }, { -1, 1, -1 } }; - double dz[3] = { 0, 0, 1 }; + T dz[3] = { 0, 0, 1 }; for (int i = 0; i < 4; i++) // horizontal edges { int eorder = edgeorder[info.edgenrs[i]]; @@ -3047,11 +3063,12 @@ namespace netgen int vi1 = (edges[i][0]-1), vi2 = (edges[i][1]-1); if (el[vi1] > el[vi2]) swap (vi1, vi2); - Vector shapei(eorder+1); + ArrayMem shapei_mem(eorder+1); + TFlatVector shapei(eorder+1,&shapei_mem[0]); CalcScaledEdgeShapeDxDt<3> (eorder, sigma[vi1]-sigma[vi2], 1-z, &dshapes(ii,0) ); CalcScaledEdgeShape(eorder, sigma[vi1]-sigma[vi2], 1-z, &shapei(0) ); - double fac = (shapes[vi1]+shapes[vi2]) / (1-z); - double dfac[3]; + T fac = (shapes[vi1]+shapes[vi2]) / (1-z); + T dfac[3]; for (int k = 0; k < 3; k++) dfac[k] = ( (dshapes(vi1,k)+dshapes(vi2,k)) * (1-z) - (shapes[vi1]+shapes[vi2]) *(-dshapes(4,k)) ) @@ -3059,8 +3076,8 @@ namespace netgen for (int j = 0; j < eorder-1; j++) { - double ddx = dshapes(ii+j,0); - double ddt = dshapes(ii+j,1); + T ddx = dshapes(ii+j,0); + T ddt = dshapes(ii+j,1); for (int k = 0; k < 3; k++) dshapes(ii+j,k) = fac * (ddx * (dsigma[vi1][k]-dsigma[vi2][k]) - ddt*dz[k]) + dfac[k] * shapei(j); @@ -3075,11 +3092,10 @@ namespace netgen case HEX: { - dshapes = 0.0; - - double x = xi(0); - double y = xi(1); - double z = xi(2); + // NgProfiler::StartTimer(timer); + T x = xi(0); + T y = xi(1); + T z = xi(2); // shapes[0] = (1-x)*(1-y)*(1-z); dshapes(0,0) = - (1-y)*(1-z); @@ -3121,10 +3137,11 @@ namespace netgen dshapes(7,1) = (1-x) * z; dshapes(7,2) = (1-x) * y; + // NgProfiler::StopTimer(timer); if (info.order == 1) return; - double shapes[8] = { + T shapes[8] = { (1-x)*(1-y)*(1-z), x *(1-y)*(1-z), x * y *(1-z), @@ -3135,7 +3152,7 @@ namespace netgen (1-x)* y *(z), }; - double mu[8] = { + T mu[8] = { (1-x)+(1-y)+(1-z), x +(1-y)+(1-z), x + y +(1-z), @@ -3146,7 +3163,7 @@ namespace netgen (1-x)+ y +(z) }; - double dmu[8][3] = { + T dmu[8][3] = { { -1, -1, -1 }, { 1, -1, -1 }, { 1, 1, -1 }, @@ -3157,7 +3174,7 @@ namespace netgen { -1, 1, 1 } }; - ArrayMem hshapes(order+1), hdshapes(order+1); + ArrayMem hshapes(order+1), hdshapes(order+1); int ii = 8; const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (HEX); @@ -3172,8 +3189,8 @@ namespace netgen CalcEdgeShapeDx (eorder, mu[vi1]-mu[vi2], &hshapes[0], &hdshapes[0]); - double lame = shapes[vi1]+shapes[vi2]; - double dlame[3] = { + T lame = shapes[vi1]+shapes[vi2]; + T dlame[3] = { dshapes(vi1, 0) + dshapes(vi2, 0), dshapes(vi1, 1) + dshapes(vi2, 1), dshapes(vi1, 2) + dshapes(vi2, 2) @@ -3779,36 +3796,43 @@ namespace netgen + template void CurvedElements :: CalcMultiPointElementTransformation (ElementIndex elnr, int n, - const double * xi, size_t sxi, - double * x, size_t sx, - double * dxdxi, size_t sdxdxi) + const T * xi, size_t sxi, + T * x, size_t sx, + T * dxdxi, size_t sdxdxi) { - // static int timer = NgProfiler::CreateTimer ("calcmultipointtrafo, calcshape"); - + static int timer = NgProfiler::CreateTimer ("calcmultipointelementtrafo"); + static int timer1 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 1"); + static int timer2 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 2"); + static int timer3 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 3"); + static int timer4 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 4"); + static int timer5 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 5"); + // NgProfiler::StartTimer (timer); + // NgProfiler::StartTimer (timer1); if (mesh.coarsemesh) { const HPRefElement & hpref_el = (*mesh.hpelements) [mesh[elnr].hp_elnr]; // xi umrechnen - double lami[8]; - FlatVector vlami(8, lami); + T lami[8]; + TFlatVector vlami(8, &lami[0]); - ArrayMem coarse_xi (3*n); + ArrayMem coarse_xi (3*n); for (int pi = 0; pi < n; pi++) { vlami = 0; - Point<3> pxi; + Point<3,T> pxi; for (int j = 0; j < 3; j++) pxi(j) = xi[pi*sxi+j]; - mesh[elnr].GetShapeNew ( pxi, vlami); + mesh[elnr].GetShapeNew (pxi, vlami); - Point<3> cxi(0,0,0); + Point<3,T> cxi(0,0,0); for (int i = 0; i < hpref_el.np; i++) for (int j = 0; j < 3; j++) cxi(j) += hpref_el.param[i][j] * lami[i]; @@ -3823,15 +3847,15 @@ namespace netgen x, sx, dxdxi, sdxdxi); - Mat<3,3> trans, dxdxic; + Mat<3,3,T> trans, dxdxic; if (dxdxi) { - MatrixFixWidth<3> dlami(8); - dlami = 0; + MatrixFixWidth<3,T> dlami(8); + dlami = T(0); for (int pi = 0; pi < n; pi++) { - Point<3> pxi; + Point<3,T> pxi; for (int j = 0; j < 3; j++) pxi(j) = xi[pi*sxi+j]; @@ -3843,7 +3867,7 @@ namespace netgen for (int i = 0; i < hpref_el.np; i++) trans(l,k) += hpref_el.param[i][l] * dlami(i, k); - Mat<3> mat_dxdxic, mat_dxdxi; + Mat<3,3,T> mat_dxdxic, mat_dxdxi; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) mat_dxdxic(j,k) = dxdxi[pi*sdxdxi+3*j+k]; @@ -3863,10 +3887,8 @@ namespace netgen - - - - MatrixFixWidth<3> dshapes; + // NgProfiler::StopTimer (timer1); + // NgProfiler::StartTimer (timer2); const Element & el = mesh[elnr]; @@ -3896,29 +3918,43 @@ namespace netgen // info.ndof += facecoeffsindex[info.facenr+1] - facecoeffsindex[info.facenr]; } + // NgProfiler::StopTimer (timer2); + // NgProfiler::StartTimer (timer3); + + ArrayMem,100> coefs(info.ndof); - ArrayMem shapes_mem(info.ndof); - Vector shapes(info.ndof, &shapes_mem[0]); + ArrayMem shapes_mem(info.ndof); + + TFlatVector shapes(info.ndof, &shapes_mem[0]); + + ArrayMem dshapes_mem(3*info.ndof); + MatrixFixWidth<3,T> dshapes(info.ndof, &dshapes_mem[0]); + + // NgProfiler::StopTimer (timer3); + // NgProfiler::StartTimer (timer4); GetCoefficients (info, &coefs[0]); if (x) { for (int j = 0; j < n; j++) { - Point<3> xij, xj; + Point<3,T> xij, xj; for (int k = 0; k < 3; k++) xij(k) = xi[j*sxi+k]; - CalcElementShapes (info, xij, shapes); - xj = 0; + xj = T(0.0); for (int i = 0; i < coefs.Size(); i++) - xj += shapes(i) * coefs[i]; + for (int k = 0; k < 3; k++) + xj(k) += shapes(i) * coefs[i](k); for (int k = 0; k < 3; k++) x[j*sx+k] = xj(k); } } + // NgProfiler::StopTimer (timer4); + // NgProfiler::StartTimer (timer5); + if (dxdxi) { if (info.order == 1 && type == TET) @@ -3926,13 +3962,13 @@ namespace netgen if (n > 0) { - Point<3> xij; + Point<3,T> xij; for (int k = 0; k < 3; k++) xij(k) = xi[k]; CalcElementDShapes (info, xij, dshapes); - Mat<3> dxdxij; + Mat<3,3,T> dxdxij; dxdxij = 0.0; for (int i = 0; i < coefs.Size(); i++) for (int j = 0; j < 3; j++) @@ -3950,36 +3986,94 @@ namespace netgen { for (int ip = 0; ip < n; ip++) { - Point<3> xij; + Point<3,T> xij; for (int k = 0; k < 3; k++) xij(k) = xi[ip*sxi+k]; - + + // just for testing ... + dshapes = T(0.0); + const Element & el = mesh[info.elnr]; + if (el.GetType() != HEX) + throw NgException ("not a hex"); CalcElementDShapes (info, xij, dshapes); - - Mat<3> dxdxij; + + /* + Mat<3,3,T> dxdxij; dxdxij = 0.0; for (int i = 0; i < coefs.Size(); i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) dxdxij(j,k) += dshapes(i,k) * coefs[i](j); - - + for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) dxdxi[ip*sdxdxi+3*j+k] = dxdxij(j,k); + */ + + T dxdxi00 = T(0.0); + T dxdxi01 = T(0.0); + T dxdxi02 = T(0.0); + T dxdxi10 = T(0.0); + T dxdxi11 = T(0.0); + T dxdxi12 = T(0.0); + T dxdxi20 = T(0.0); + T dxdxi21 = T(0.0); + T dxdxi22 = T(0.0); + + for (int i = 0; i < coefs.Size(); i++) + { + T ds0 = dshapes(i,0); + T ds1 = dshapes(i,1); + T ds2 = dshapes(i,2); + T cf0 = coefs[i](0); + T cf1 = coefs[i](1); + T cf2 = coefs[i](2); + + dxdxi00 += ds0*cf0; + dxdxi01 += ds1*cf0; + dxdxi02 += ds2*cf0; + dxdxi10 += ds0*cf1; + dxdxi11 += ds1*cf1; + dxdxi12 += ds2*cf1; + dxdxi20 += ds0*cf2; + dxdxi21 += ds1*cf2; + dxdxi22 += ds2*cf2; + } + + dxdxi[ip*sdxdxi+3*0+0] = dxdxi00; + dxdxi[ip*sdxdxi+3*0+1] = dxdxi01; + dxdxi[ip*sdxdxi+3*0+2] = dxdxi02; + + dxdxi[ip*sdxdxi+3*1+0] = dxdxi10; + dxdxi[ip*sdxdxi+3*1+1] = dxdxi11; + dxdxi[ip*sdxdxi+3*1+2] = dxdxi12; + + dxdxi[ip*sdxdxi+3*2+0] = dxdxi20; + dxdxi[ip*sdxdxi+3*2+1] = dxdxi21; + dxdxi[ip*sdxdxi+3*2+2] = dxdxi22; } } } + + // NgProfiler::StopTimer (timer5); + // NgProfiler::StopTimer (timer); } + template + void CurvedElements :: + CalcMultiPointElementTransformation + (ElementIndex elnr, int n, + const double * xi, size_t sxi, + double * x, size_t sx, + double * dxdxi, size_t sdxdxi); - - - - - + template + void CurvedElements :: + CalcMultiPointElementTransformation + (ElementIndex elnr, int n, + const SIMD * xi, size_t sxi, + SIMD * x, size_t sx, + SIMD * dxdxi, size_t sdxdxi); }; - - diff --git a/libsrc/meshing/curvedelems.hpp b/libsrc/meshing/curvedelems.hpp index 9704374d..a30c31ae 100644 --- a/libsrc/meshing/curvedelems.hpp +++ b/libsrc/meshing/curvedelems.hpp @@ -141,10 +141,11 @@ public: Array< Point<3> > * x, Array< Mat<3,3> > * dxdxi); + template void CalcMultiPointElementTransformation (ElementIndex elnr, int n, - const double * xi, size_t sxi, - double * x, size_t sx, - double * dxdxi, size_t sdxdxi); + const T * xi, size_t sxi, + T * x, size_t sx, + T * dxdxi, size_t sdxdxi); @@ -196,10 +197,11 @@ private: Vec<3> hcoefs[10]; // enough for second order tets }; - - void CalcElementShapes (ElementInfo & info, const Point<3> & xi, Vector & shapes) const; + template + void CalcElementShapes (ElementInfo & info, Point<3,T> xi, TFlatVector shapes) const; void GetCoefficients (ElementInfo & info, Vec<3> * coefs) const; - void CalcElementDShapes (ElementInfo & info, const Point<3> & xi, MatrixFixWidth<3> & dshapes) const; + template + void CalcElementDShapes (ElementInfo & info, const Point<3,T> xi, MatrixFixWidth<3,T> dshapes) const; class SurfaceElementInfo diff --git a/libsrc/meshing/meshtype.cpp b/libsrc/meshing/meshtype.cpp index b6eb599f..0b0720a8 100644 --- a/libsrc/meshing/meshtype.cpp +++ b/libsrc/meshing/meshtype.cpp @@ -1965,9 +1965,9 @@ namespace netgen } } - + template void Element :: - GetDShapeNew (const Point<3> & p, MatrixFixWidth<3> & dshape) const + GetDShapeNew (const Point<3,T> & p, MatrixFixWidth<3,T> & dshape) const { switch (typ) { diff --git a/libsrc/meshing/meshtype.hpp b/libsrc/meshing/meshtype.hpp index 37b08460..19056e21 100644 --- a/libsrc/meshing/meshtype.hpp +++ b/libsrc/meshing/meshtype.hpp @@ -125,7 +125,6 @@ namespace netgen PointIndex operator-- (int) { PointIndex hi(*this); i--; return hi; } PointIndex operator++ () { i++; return *this; } PointIndex operator-- () { i--; return *this; } - #ifdef BASE0 enum { BASE = 0 }; #else @@ -718,9 +717,12 @@ namespace netgen void GetShape (const Point<3> & p, class Vector & shape) const; void GetShapeNew (const Point<3> & p, class FlatVector & shape) const; + template + void GetShapeNew (const Point<3,T> & p, TFlatVector shape) const; /// matrix 2 * np void GetDShape (const Point<3> & p, class DenseMatrix & dshape) const; - void GetDShapeNew (const Point<3> & p, class MatrixFixWidth<3> & dshape) const; + template + void GetDShapeNew (const Point<3,T> & p, class MatrixFixWidth<3,T> & dshape) const; /// matrix 3 * np void GetPointMatrix (const T_POINTS & points, class DenseMatrix & pmat) const;