From 8414cb2d60f0c9388edf04519be0636e8f9ab019 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Wed, 6 Jul 2016 17:12:57 +0100 Subject: [PATCH 01/10] using simds for element-trafo, added type-template to many classes --- libsrc/general/array.hpp | 4 +- libsrc/general/myadt.hpp | 3 + libsrc/general/mysimd.hpp | 358 ++++++++++++++++++++++ libsrc/gprim/geomobjects.hpp | 80 ++--- libsrc/gprim/geomops.hpp | 8 +- libsrc/interface/nginterface_v2.cpp | 8 + libsrc/linalg/densemat.hpp | 116 +++++++- libsrc/linalg/vector.hpp | 41 +++ libsrc/meshing/curvedelems.cpp | 444 +++++++++++++++++----------- libsrc/meshing/curvedelems.hpp | 14 +- libsrc/meshing/meshtype.cpp | 4 +- libsrc/meshing/meshtype.hpp | 6 +- 12 files changed, 854 insertions(+), 232 deletions(-) create mode 100644 libsrc/general/mysimd.hpp diff --git a/libsrc/general/array.hpp b/libsrc/general/array.hpp index 8cf9ee19..1429f2c8 100644 --- a/libsrc/general/array.hpp +++ b/libsrc/general/array.hpp @@ -416,9 +416,9 @@ namespace netgen using Array::data; using Array::ownmem; - // T mem[S]; // Intel C++ calls dummy constructor + T mem[S]; // Intel C++ calls dummy constructor // char mem[S*sizeof(T)]; - double mem[(S*sizeof(T)+7) / 8]; + // double mem[(S*sizeof(T)+7) / 8]; public: /// Generate array of logical and physical size asize explicit ArrayMem(int asize = 0) diff --git a/libsrc/general/myadt.hpp b/libsrc/general/myadt.hpp index c5b9c5ec..8fa8b95d 100644 --- a/libsrc/general/myadt.hpp +++ b/libsrc/general/myadt.hpp @@ -46,4 +46,7 @@ #include "gzstream.h" #include "archive_base.hpp" +#include "mysimd.hpp" + + #endif diff --git a/libsrc/general/mysimd.hpp b/libsrc/general/mysimd.hpp new file mode 100644 index 00000000..ed67dbec --- /dev/null +++ b/libsrc/general/mysimd.hpp @@ -0,0 +1,358 @@ +#ifndef FILE_MYSIMD +#define FILE_MYSIMD + +/**************************************************************************/ +/* File: mysimd.hpp */ +/* Author: Joachim Schoeberl */ +/* Date: 25. Mar. 16 */ +/**************************************************************************/ + +#include + +#ifdef WIN32 +inline __m128d operator- (__m128d a) { return _mm_xor_pd(a, _mm_set1_pd(-0.0)); } +inline __m128d operator+ (__m128d a, __m128d b) { return _mm_add_pd(a,b); } +inline __m128d operator- (__m128d a, __m128d b) { return _mm_sub_pd(a,b); } +inline __m128d operator* (__m128d a, __m128d b) { return _mm_mul_pd(a,b); } +inline __m128d operator/ (__m128d a, __m128d b) { return _mm_div_pd(a,b); } +inline __m128d operator* (double a, __m128d b) { return _mm_set1_pd(a)*b; } +inline __m128d operator* (__m128d b, double a) { return _mm_set1_pd(a)*b; } + +inline __m128d operator+= (__m128d &a, __m128d b) { return a = a+b; } +inline __m128d operator-= (__m128d &a, __m128d b) { return a = a-b; } +inline __m128d operator*= (__m128d &a, __m128d b) { return a = a*b; } +inline __m128d operator/= (__m128d &a, __m128d b) { return a = a/b; } + +inline __m256d operator- (__m256d a) { return _mm256_xor_pd(a, _mm256_set1_pd(-0.0)); } +inline __m256d operator+ (__m256d a, __m256d b) { return _mm256_add_pd(a,b); } +inline __m256d operator- (__m256d a, __m256d b) { return _mm256_sub_pd(a,b); } +inline __m256d operator* (__m256d a, __m256d b) { return _mm256_mul_pd(a,b); } +inline __m256d operator/ (__m256d a, __m256d b) { return _mm256_div_pd(a,b); } +inline __m256d operator* (double a, __m256d b) { return _mm256_set1_pd(a)*b; } +inline __m256d operator* (__m256d b, double a) { return _mm256_set1_pd(a)*b; } + +inline __m256d operator+= (__m256d &a, __m256d b) { return a = a+b; } +inline __m256d operator-= (__m256d &a, __m256d b) { return a = a-b; } +inline __m256d operator*= (__m256d &a, __m256d b) { return a = a*b; } +inline __m256d operator/= (__m256d &a, __m256d b) { return a = a/b; } +#endif + + + +namespace netgen +{ + template class SIMD; + + template + struct has_call_operator + { + template static std::true_type check( decltype( sizeof(&C::operator() )) ) { return std::true_type(); } + template static std::false_type check(...) { return std::false_type(); } + typedef decltype( check(sizeof(char)) ) type; + static constexpr type value = type(); + }; + +#ifdef __AVX2__ + + template<> + class alignas(32) SIMD + { + __m256d data; + + public: + static constexpr int Size() { return 4; } + SIMD () = default; + SIMD (const SIMD &) = default; + SIMD & operator= (const SIMD &) = default; + + SIMD (double val) + { + data = _mm256_set1_pd(val); + } + + SIMD (__m256d adata) + : data(adata) + { ; } + + /* + template + SIMD (const T & val) + { +// SIMD_function(val, std::is_convertible>()); + SIMD_function(val, has_call_operator::value); + } + */ + + /* + template + SIMD & operator= (const T & val) + { +// SIMD_function(val, std::is_convertible>()); + SIMD_function(val, has_call_operator::value); + return *this; + } + */ + + + template + void SIMD_function (const Function & func, std::true_type) + { + data = _mm256_set_pd(func(3), func(2), func(1), func(0)); + } + + // not a function + void SIMD_function (double const * p, std::false_type) + { + data = _mm256_loadu_pd(p); + } + + void SIMD_function (double val, std::false_type) + { + data = _mm256_set1_pd(val); + } + + void SIMD_function (__m256d _data, std::false_type) + { + data = _data; + } + + inline double operator[] (int i) const { return ((double*)(&data))[i]; } + inline __m256d Data() const { return data; } + inline __m256d & Data() { return data; } + }; + + + inline SIMD operator+ (SIMD a, SIMD b) { return a.Data()+b.Data(); } + inline SIMD operator- (SIMD a, SIMD b) { return a.Data()-b.Data(); } + inline SIMD operator- (SIMD a) { return -a.Data(); } + inline SIMD operator* (SIMD a, SIMD b) { return a.Data()*b.Data(); } + inline SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } + inline SIMD operator* (double a, SIMD b) { return SIMD(a)*b; } + inline SIMD operator* (SIMD b, double a) { return SIMD(a)*b; } + inline SIMD operator+= (SIMD & a, SIMD b) { return a.Data()+=b.Data(); } + inline SIMD operator-= (SIMD & a, SIMD b) { return a.Data()-=b.Data(); } + inline SIMD operator*= (SIMD & a, SIMD b) { return a.Data()*=b.Data(); } + inline SIMD operator/= (SIMD & a, SIMD b) { return a.Data()/=b.Data(); } + + using std::sqrt; + using std::fabs; + + inline SIMD sqrt (SIMD a) { return _mm256_sqrt_pd(a.Data()); } + inline SIMD fabs (SIMD a) { return _mm256_max_pd(a.Data(), -a.Data()); } + inline SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } + inline SIMD Trans (SIMD a) { return a; } + inline SIMD IfPos (SIMD a, SIMD b, SIMD c) + { + auto cp = _mm256_cmp_pd (a.Data(), _mm256_setzero_pd(), _CMP_GT_OS); + return _mm256_blendv_pd(c.Data(), b.Data(), cp); + } + + inline double HSum (SIMD sd) + { + __m128d hv = _mm_add_pd (_mm256_extractf128_pd(sd.Data(),0), _mm256_extractf128_pd(sd.Data(),1)); + return _mm_cvtsd_f64 (_mm_hadd_pd (hv, hv)); + } + + + + +#else + + template<> + class SIMD + { + double data; + + public: + static constexpr int Size() { return 1; } + SIMD () = default; + SIMD (const SIMD &) = default; + SIMD & operator= (const SIMD &) = default; + + template + SIMD (const T & val) + { +// SIMD_function(val, std::is_convertible>()); + SIMD_function(val, has_call_operator::value); + } + + template + SIMD & operator= (const T & val) + { +// SIMD_function(val, std::is_convertible>()); + SIMD_function(val, has_call_operator::value); + return *this; + } + + template + void SIMD_function (const Function & func, std::true_type) + { + data = func(0); + } + + // not a function + void SIMD_function (double const * p, std::false_type) + { + data = *p; + } + + void SIMD_function (double val, std::false_type) + { + data = val; + } + + double operator[] (int i) const { return ((double*)(&data))[i]; } + double Data() const { return data; } + double & Data() { return data; } + }; + + + inline SIMD operator+ (SIMD a, SIMD b) { return a.Data()+b.Data(); } + inline SIMD operator- (SIMD a, SIMD b) { return a.Data()-b.Data(); } + inline SIMD operator- (SIMD a) { return -a.Data(); } + inline SIMD operator* (SIMD a, SIMD b) { return a.Data()*b.Data(); } + inline SIMD operator/ (SIMD a, SIMD b) { return a.Data()/b.Data(); } + inline SIMD operator* (double a, SIMD b) { return SIMD(a)*b; } + inline SIMD operator* (SIMD b, double a) { return SIMD(a)*b; } + inline SIMD operator+= (SIMD & a, SIMD b) { return a.Data()+=b.Data(); } + inline SIMD operator-= (SIMD & a, SIMD b) { return a.Data()-=b.Data(); } + inline SIMD operator*= (SIMD & a, SIMD b) { return a.Data()*=b.Data(); } + inline SIMD operator/= (SIMD & a, SIMD b) { return a.Data()/=b.Data(); } + + inline SIMD sqrt (SIMD a) { return std::sqrt(a.Data()); } + inline SIMD fabs (SIMD a) { return std::fabs(a.Data()); } + inline SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } + inline SIMD Trans (SIMD a) { return a; } + inline SIMD IfPos (SIMD a, SIMD b, SIMD c) + { + return (a.Data() > 0) ? b : c; + } + + inline double HSum (SIMD sd) + { return sd.Data(); } + +#endif + + + + + + + + template + ostream & operator<< (ostream & ost, SIMD simd) + { + ost << simd[0]; + for (int i = 1; i < simd.Size(); i++) + ost << " " << simd[i]; + return ost; + } + + /* + using std::exp; + inline netgen::SIMD exp (netgen::SIMD a) { + return netgen::SIMD([&](int i)->double { return exp(a[i]); } ); +} + + using std::log; +inline netgen::SIMD log (netgen::SIMD a) { + return netgen::SIMD([&](int i)->double { return log(a[i]); } ); +} + + using std::pow; +inline netgen::SIMD pow (netgen::SIMD a, double x) { + return netgen::SIMD([&](int i)->double { return pow(a[i],x); } ); +} + */ + + + template + class MultiSIMD + { + SIMD head; + MultiSIMD tail; + public: + MultiSIMD () = default; + MultiSIMD (const MultiSIMD & ) = default; + MultiSIMD (T v) : head(v), tail(v) { ; } + MultiSIMD (SIMD _head, MultiSIMD _tail) + : head(_head), tail(_tail) { ; } + SIMD Head() const { return head; } + MultiSIMD Tail() const { return tail; } + SIMD & Head() { return head; } + MultiSIMD & Tail() { return tail; } + + template + SIMD Get() const { return NR==0 ? head : tail.template Get(); } + template + SIMD & Get() { return NR==0 ? head : tail.template Get(); } + }; + + template + class MultiSIMD<2,T> + { + SIMD v0, v1; + public: + MultiSIMD () = default; + MultiSIMD (const MultiSIMD & ) = default; + MultiSIMD (T v) : v0(v), v1(v) { ; } + MultiSIMD (SIMD _v0, SIMD _v1) : v0(_v0), v1(_v1) { ; } + + SIMD Head() const { return v0; } + SIMD Tail() const { return v1; } + SIMD & Head() { return v0; } + SIMD & Tail() { return v1; } + + template + SIMD Get() const { return NR==0 ? v0 : v1; } + template + SIMD & Get() { return NR==0 ? v0 : v1; } + }; + + template inline MultiSIMD operator+ (MultiSIMD a, MultiSIMD b) + { return MultiSIMD (a.Head()+b.Head(), a.Tail()+b.Tail()); } + template inline MultiSIMD operator+ (double a, MultiSIMD b) + { return MultiSIMD (a+b.Head(), a+b.Tail()); } + template inline MultiSIMD operator+ (MultiSIMD b, double a) + { return MultiSIMD (a+b.Head(), a+b.Tail()); } + + template inline MultiSIMD operator- (MultiSIMD a, MultiSIMD b) + { return MultiSIMD (a.Head()-b.Head(), a.Tail()-b.Tail()); } + template inline MultiSIMD operator- (double a, MultiSIMD b) + { return MultiSIMD (a-b.Head(), a-b.Tail()); } + template inline MultiSIMD operator- (MultiSIMD b, double a) + { return MultiSIMD (b.Head()-a, b.Tail()-a); } + template inline MultiSIMD operator- (MultiSIMD a) + { return MultiSIMD (-a.Head(), -a.Tail()); } + template inline MultiSIMD operator* (MultiSIMD a, MultiSIMD b) + { return MultiSIMD (a.Head()*b.Head(), a.Tail()*b.Tail()); } + template inline MultiSIMD operator/ (MultiSIMD a, MultiSIMD b) + { return MultiSIMD (a.Head()/b.Head(), a.Tail()/b.Tail()); } + template inline MultiSIMD operator* (double a, MultiSIMD b) + { return MultiSIMD ( a*b.Head(), a*b.Tail()); } + template inline MultiSIMD operator* (MultiSIMD b, double a) + { return MultiSIMD ( a*b.Head(), a*b.Tail()); } + + template inline MultiSIMD & operator+= (MultiSIMD & a, MultiSIMD b) + { a.Head()+=b.Head(); a.Tail()+=b.Tail(); return a; } + template inline MultiSIMD operator-= (MultiSIMD & a, double b) + { a.Head()-=b; a.Tail()-=b; return a; } + template inline MultiSIMD operator-= (MultiSIMD & a, MultiSIMD b) + { a.Head()-=b.Head(); a.Tail()-=b.Tail(); return a; } + template inline MultiSIMD & operator*= (MultiSIMD & a, MultiSIMD b) + { a.Head()*=b.Head(); a.Tail()*=b.Tail(); return a; } + template inline MultiSIMD & operator*= (MultiSIMD & a, double b) + { a.Head()*=b; a.Tail()*=b; return a; } + // inline MultiSIMD operator/= (MultiSIMD & a, MultiSIMD b) { return a.Data()/=b.Data(); } + + + template + ostream & operator<< (ostream & ost, MultiSIMD multi) + { + ost << multi.Head() << " " << multi.Tail(); + return ost; + } + +} + +#endif diff --git a/libsrc/gprim/geomobjects.hpp b/libsrc/gprim/geomobjects.hpp index ea80f7ff..d4ca640f 100644 --- a/libsrc/gprim/geomobjects.hpp +++ b/libsrc/gprim/geomobjects.hpp @@ -12,31 +12,31 @@ namespace netgen { - template class Vec; - template class Point; + template class Vec; + template class Point; - template + template class Point { protected: - double x[D]; + T x[D]; public: Point () { ; } - Point (double ax) { for (int i = 0; i < D; i++) x[i] = ax; } - Point (double ax, double ay) + Point (T ax) { for (int i = 0; i < D; i++) x[i] = ax; } + Point (T ax, T ay) { // static_assert(D==2, "Point constructor with 2 args called"); x[0] = ax; x[1] = ay; } - Point (double ax, double ay, double az) + Point (T ax, T ay, T az) { // static_assert(D==3, "Point constructor with 3 args called"); x[0] = ax; x[1] = ay; x[2] = az; } - Point (double ax, double ay, double az, double au) + Point (T ax, T ay, T az, T au) { x[0] = ax; x[1] = ay; x[2] = az; x[3] = au;} Point (const Point & p2) @@ -52,39 +52,39 @@ namespace netgen return *this; } - Point & operator= (double val) + Point & operator= (T val) { for (int i = 0; i < D; i++) x[i] = val; return *this; } - double & operator() (int i) { return x[i]; } - const double & operator() (int i) const { return x[i]; } + T & operator() (int i) { return x[i]; } + const T & operator() (int i) const { return x[i]; } - operator const double* () const { return x; } + operator const T* () const { return x; } }; - template + template class Vec { protected: - double x[D]; + T x[D]; public: Vec () { ; } // for (int i = 0; i < D; i++) x[i] = 0; } - Vec (double ax) { for (int i = 0; i < D; i++) x[i] = ax; } - Vec (double ax, double ay) + Vec (T ax) { for (int i = 0; i < D; i++) x[i] = ax; } + Vec (T ax, T ay) { // static_assert(D==2, "Vec constructor with 2 args called"); x[0] = ax; x[1] = ay; } - Vec (double ax, double ay, double az) + Vec (T ax, T ay, T az) { // static_assert(D==3, "Vec constructor with 3 args called"); x[0] = ax; x[1] = ay; x[2] = az; } - Vec (double ax, double ay, double az, double au) + Vec (T ax, T ay, T az, T au) { x[0] = ax; x[1] = ay; x[2] = az; x[3] = au; } Vec (const Vec & p2) @@ -104,28 +104,28 @@ namespace netgen return *this; } - Vec & operator= (double s) + Vec & operator= (T s) { for (int i = 0; i < D; i++) x[i] = s; return *this; } - double & operator() (int i) { return x[i]; } - const double & operator() (int i) const { return x[i]; } + T & operator() (int i) { return x[i]; } + const T & operator() (int i) const { return x[i]; } - operator const double* () const { return x; } + operator const T* () const { return x; } - double Length () const + T Length () const { - double l = 0; + T l = 0; for (int i = 0; i < D; i++) l += x[i] * x[i]; return sqrt (l); } - double Length2 () const + T Length2 () const { - double l = 0; + T l = 0; for (int i = 0; i < D; i++) l += x[i] * x[i]; return l; @@ -133,7 +133,7 @@ namespace netgen const Vec & Normalize () { - double l = Length(); + T l = Length(); if (l != 0) for (int i = 0; i < D; i++) x[i] /= l; @@ -147,19 +147,19 @@ namespace netgen - template + template class Mat { protected: - double x[H*W]; + T x[H*W]; public: Mat () { ; } Mat (const Mat & b) { for (int i = 0; i < H*W; i++) x[i] = b.x[i]; } - Mat & operator= (double s) + Mat & operator= (T s) { for (int i = 0; i < H*W; i++) x[i] = s; return *this; @@ -171,30 +171,30 @@ namespace netgen return *this; } - double & operator() (int i, int j) { return x[i*W+j]; } - const double & operator() (int i, int j) const { return x[i*W+j]; } - double & operator() (int i) { return x[i]; } - const double & operator() (int i) const { return x[i]; } + T & operator() (int i, int j) { return x[i*W+j]; } + const T & operator() (int i, int j) const { return x[i*W+j]; } + T & operator() (int i) { return x[i]; } + const T & operator() (int i) const { return x[i]; } - Vec Col (int i) const + Vec Col (int i) const { - Vec hv; + Vec hv; for (int j = 0; j < H; j++) hv(j) = x[j*W+i]; return hv; } - Vec Row (int i) const + Vec Row (int i) const { - Vec hv; + Vec hv; for (int j = 0; j < W; j++) hv(j) = x[i*W+j]; return hv; } - void Solve (const Vec & rhs, Vec & sol) const + void Solve (const Vec & rhs, Vec & sol) const { - Mat inv; + Mat inv; CalcInverse (*this, inv); sol = inv * rhs; } diff --git a/libsrc/gprim/geomops.hpp b/libsrc/gprim/geomops.hpp index 90c3cb6b..45614c6b 100644 --- a/libsrc/gprim/geomops.hpp +++ b/libsrc/gprim/geomops.hpp @@ -306,14 +306,14 @@ namespace netgen return m; } - - inline Mat<3,3> operator* (const Mat<3,3> & a, const Mat<3,3> & b) + template + inline Mat<3,3,T> operator* (const Mat<3,3,T> & a, const Mat<3,3,T> & b) { - Mat<3,3> m; + Mat<3,3,T> m; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) { - double sum = 0; + T sum = T(0); for (int k = 0; k < 3; k++) sum += a(i,k) * b(k, j); m(i,j) = sum; diff --git a/libsrc/interface/nginterface_v2.cpp b/libsrc/interface/nginterface_v2.cpp index b39698ef..56e34ce2 100644 --- a/libsrc/interface/nginterface_v2.cpp +++ b/libsrc/interface/nginterface_v2.cpp @@ -645,6 +645,13 @@ namespace netgen __m256d * x, size_t sx, __m256d * dxdxi, size_t sdxdxi) const { + mesh->GetCurvedElements().CalcMultiPointElementTransformation + (elnr, npts, + reinterpret_cast*> (xi), sxi, + reinterpret_cast*> (x), sx, + reinterpret_cast*> (dxdxi), sdxdxi); + + /* for (int i = 0; i < npts; i++) { double hxi[4][3]; @@ -665,6 +672,7 @@ namespace netgen x += sx; dxdxi += sdxdxi; } + */ } template<> DLL_HEADER void Ngx_Mesh :: diff --git a/libsrc/linalg/densemat.hpp b/libsrc/linalg/densemat.hpp index 28ca59ba..9b007202 100644 --- a/libsrc/linalg/densemat.hpp +++ b/libsrc/linalg/densemat.hpp @@ -152,9 +152,120 @@ extern ostream & operator<< (ostream & ost, const DenseMatrix & m); -template +template class MatrixFixWidth { +protected: + int height; + T * __restrict data; + bool ownmem; +public: + /// + MatrixFixWidth () + { height = 0; data = 0; ownmem = false; } + /// + MatrixFixWidth (int h) + { height = h; data = new T[WIDTH*height]; ownmem = true; } + /// + MatrixFixWidth (int h, T * adata) + { height = h; data = adata; ownmem = false; } + /// + MatrixFixWidth (const MatrixFixWidth & m2) + : height(m2.height), data(m2.data), ownmem(false) + { ; } + /// + ~MatrixFixWidth () + { if (ownmem) delete [] data; } + + void SetSize (int h) + { + if (h != height) + { + if (ownmem) delete data; + height = h; + data = new T[WIDTH*height]; + ownmem = true; + } + } + + /// + int Height() const { return height; } + + /// + int Width() const { return WIDTH; } + MatrixFixWidth & operator= (const MatrixFixWidth & m2) + { + for (int i = 0; i < height*WIDTH; i++) + data[i] = m2.data[i]; + } + /// + MatrixFixWidth & operator= (T v) + { + for (int i = 0; i < height*WIDTH; i++) + data[i] = v; + return *this; + } + + /* + /// + void Mult (const FlatVector & v, FlatVector & prod) const + { + T sum; + const T * mp, * sp; + T * dp; + + mp = data; + dp = &prod[0]; + for (int i = 0; i < height; i++) + { + sum = 0; + sp = &v[0]; + + for (int j = 0; j < WIDTH; j++) + { + sum += *mp * *sp; + mp++; + sp++; + } + + *dp = sum; + dp++; + } + } + */ + + T & operator() (int i, int j) + { return data[i*WIDTH+j]; } + + const T & operator() (int i, int j) const + { return data[i*WIDTH+j]; } + + + MatrixFixWidth & operator*= (T v) + { + if (data) + for (int i = 0; i < height*WIDTH; i++) + data[i] *= v; + return *this; + } + + + + const T & Get(int i, int j) const { return data[(i-1)*WIDTH+j-1]; } + /// + const T & Get(int i) const { return data[i-1]; } + /// + void Set(int i, int j, T v) { data[(i-1)*WIDTH+j-1] = v; } + /// + T & Elem(int i, int j) { return data[(i-1)*WIDTH+j-1]; } + /// + const T & ConstElem(int i, int j) const { return data[(i-1)*WIDTH+j-1]; } +}; + + +template +class MatrixFixWidth +{ protected: int height; double * data; @@ -261,6 +372,9 @@ public: }; + + + template extern ostream & operator<< (ostream & ost, const MatrixFixWidth & m) { diff --git a/libsrc/linalg/vector.hpp b/libsrc/linalg/vector.hpp index e7c52e81..1b103d0e 100644 --- a/libsrc/linalg/vector.hpp +++ b/libsrc/linalg/vector.hpp @@ -79,6 +79,47 @@ public: }; +template +class TFlatVector +{ +protected: + int s; + T * data; +public: + TFlatVector () { ; } + TFlatVector (int as, T * adata) + { s = as; data = adata; } + + int Size () const + { return s; } + + TFlatVector & operator= (const TFlatVector & v) + { memcpy (data, v.data, s*sizeof(T)); return *this; } + + TFlatVector & operator= (T scal) + { + for (int i = 0; i < s; i++) data[i] = scal; + return *this; + } + + T & operator[] (int i) { return data[i]; } + const T & operator[] (int i) const { return data[i]; } + T & operator() (int i) { return data[i]; } + const T & operator() (int i) const { return data[i]; } + + // double & Elem (int i) { return data[i-1]; } + // const double & Get (int i) const { return data[i-1]; } + // void Set (int i, double val) { data[i-1] = val; } + + TFlatVector & operator*= (T scal) + { + for (int i = 0; i < s; i++) data[i] *= scal; + return *this; + } +}; + + + class Vector : public FlatVector { diff --git a/libsrc/meshing/curvedelems.cpp b/libsrc/meshing/curvedelems.cpp index f1673bb8..f5db4038 100644 --- a/libsrc/meshing/curvedelems.cpp +++ b/libsrc/meshing/curvedelems.cpp @@ -48,9 +48,10 @@ namespace netgen // compute edge bubbles up to order n, x \in (-1, 1) - static void CalcEdgeShape (int n, double x, double * shape) + template + static void CalcEdgeShape (int n, T x, T * shape) { - double p1 = x, p2 = -1, p3 = 0; + T p1 = x, p2 = -1, p3 = 0; for (int j=2; j<=n; j++) { p3=p2; p2=p1; @@ -76,10 +77,11 @@ namespace netgen } } - static void CalcEdgeShapeDx (int n, double x, double * shape, double * dshape) + template + static void CalcEdgeShapeDx (int n, T x, T * shape, T * dshape) { - double p1 = x, p2 = -1, p3 = 0; - double p1dx = 1, p2dx = 0, p3dx = 0; + T p1 = x, p2 = -1, p3 = 0; + T p1dx = 1, p2dx = 0, p3dx = 0; for (int j=2; j<=n; j++) { @@ -95,7 +97,8 @@ namespace netgen } // compute L_i(x/t) * t^i - static void CalcScaledEdgeShape (int n, double x, double t, double * shape) + template + static void CalcScaledEdgeShape (int n, T x, T t, T * shape) { static bool init = false; static double coefs[100][2]; @@ -109,8 +112,8 @@ namespace netgen init = true; } - double p1 = x, p2 = -1, p3 = 0; - double tt = t*t; + T p1 = x, p2 = -1, p3 = 0; + T tt = t*t; for (int j=0; j<=n-2; j++) { p3=p2; p2=p1; @@ -120,13 +123,13 @@ namespace netgen } } - template - static void CalcScaledEdgeShapeDxDt (int n, double x, double t, double * dshape) + template + static void CalcScaledEdgeShapeDxDt (int n, T x, T t, T * dshape) { - double p1 = x, p2 = -1, p3 = 0; - double p1dx = 1, p1dt = 0; - double p2dx = 0, p2dt = 0; - double p3dx = 0, p3dt = 0; + T p1 = x, p2 = -1, p3 = 0; + T p1dx = 1, p1dt = 0; + T p2dx = 0, p2dt = 0; + T p3dx = 0, p3dt = 0; for (int j=0; j<=n-2; j++) { @@ -225,7 +228,7 @@ namespace netgen template void Evaluate (int n, S x, T * values) { - S p1 = 1.0, p2 = 0.0, p3; + S p1(1.0), p2(0.0), p3; if (n >= 0) p2 = values[0] = 1.0; @@ -243,7 +246,7 @@ namespace netgen template void EvaluateScaled (int n, S x, S y, T * values) { - S p1 = 1.0, p2 = 0.0, p3; + S p1(1.0), p2(0.0), p3; if (n >= 0) p2 = values[0] = 1.0; @@ -314,7 +317,7 @@ namespace netgen if (n >= 0) values[0] = 1.0; */ - S p1 = 1.0, p2 = 0.0, p3; + S p1(1.0), p2(0.0), p3; if (n >= 0) p2 = values[0] = 1.0; @@ -378,14 +381,14 @@ namespace netgen shape[ii++] = hx[ix]*hy[iy+50*ix]; } - - static void CalcTrigShapeDxDy (int n, double x, double y, double * dshape) + template + static void CalcTrigShapeDxDy (int n, T x, T y, T * dshape) { if (n < 3) return; - AutoDiff<2> adx(x, 0); - AutoDiff<2> ady(y, 1); - AutoDiff<2> res[2000]; + AutoDiff<2,T> adx(x, 0); + AutoDiff<2,T> ady(y, 1); + AutoDiff<2,T> res[2000]; CalcTrigShape (n, adx, ady, &res[0]); int ndof = (n-1)*(n-2)/2; for (int i = 0; i < ndof; i++) @@ -421,13 +424,14 @@ namespace netgen // compute face bubbles up to order n, 0 < y, y-x < 1, x+y < 1 - static void CalcScaledTrigShapeDxDyDt (int n, double x, double y, double t, double * dshape) + template + static void CalcScaledTrigShapeDxDyDt (int n, T x, T y, T t, T * dshape) { if (n < 3) return; - AutoDiff<3> adx(x, 0); - AutoDiff<3> ady(y, 1); - AutoDiff<3> adt(t, 2); - AutoDiff<3> res[2000]; + AutoDiff<3,T> adx(x, 0); + AutoDiff<3,T> ady(y, 1); + AutoDiff<3,T> adt(t, 2); + AutoDiff<3,T> res[2000]; CalcScaledTrigShape (n, adx, ady, adt, &res[0]); int ndof = (n-1)*(n-2)/2; for (int i = 0; i < ndof; i++) @@ -1390,7 +1394,6 @@ namespace netgen - void CurvedElements :: CalcElementShapes (SegmentInfo & info, double xi, Vector & shapes) const { @@ -2281,9 +2284,6 @@ namespace netgen } - Vector shapes; - MatrixFixWidth<3> dshapes; - const Element & el = mesh[elnr]; ELEMENT_TYPE type = el.GetType(); @@ -2315,6 +2315,11 @@ namespace netgen } } + ArrayMem mem(info.ndof); + TFlatVector shapes(info.ndof, &mem[0]); + ArrayMem dshapes_mem(info.ndof*3); + MatrixFixWidth<3> dshapes(info.ndof, &dshapes_mem[0]); + CalcElementShapes (info, xi, shapes); Vec<3> * coefs = (info.ndof <= 10) ? @@ -2359,16 +2364,16 @@ namespace netgen - - void CurvedElements :: CalcElementShapes (ElementInfo & info, const Point<3> & xi, Vector & shapes) const + template + void CurvedElements :: CalcElementShapes (ElementInfo & info, Point<3,T> xi, TFlatVector shapes) const { const Element & el = mesh[info.elnr]; if (rational && info.order >= 2) { - shapes.SetSize(10); - double w = 1; - double lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; + // shapes.SetSize(10); + T w = 1; + T lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; for (int j = 0; j < 4; j++) shapes(j) = lami[j] * lami[j]; @@ -2384,7 +2389,7 @@ namespace netgen return; } - shapes.SetSize(info.ndof); + // shapes.SetSize(info.ndof); switch (el.GetType()) { @@ -2434,10 +2439,10 @@ namespace netgen case TET10: { - double x = xi(0); - double y = xi(1); - double z = xi(2); - double lam4 = 1 - x - y - z; + T x = xi(0); + T y = xi(1); + T z = xi(2); + T lam4 = 1 - x - y - z; /* shapes(0) = xi(0); shapes(1) = xi(1); @@ -2462,8 +2467,8 @@ namespace netgen case PRISM: { - double lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) }; - double lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) }; + T lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) }; + T lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) }; for (int i = 0; i < 6; i++) shapes(i) = lami[i] * lamiz[i]; for (int i = 6; i < info.ndof; i++) @@ -2483,7 +2488,7 @@ namespace netgen if (el[vi1] > el[vi2]) swap (vi1, vi2); CalcScaledEdgeShape (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &shapes(ii)); - double facz = (i < 3) ? (1-xi(2)) : xi(2); + T facz = (i < 3) ? (1-xi(2)) : xi(2); for (int j = 0; j < eorder-1; j++) shapes(ii+j) *= facz; @@ -2499,9 +2504,9 @@ namespace netgen int vi1 = edges[i][0]-1, vi2 = edges[i][1]-1; if (el[vi1] > el[vi2]) swap (vi1, vi2); - double bubz = lamiz[vi1]*lamiz[vi2]; - double polyz = lamiz[vi1] - lamiz[vi2]; - double bubxy = lami[vi1]; + T bubz = lamiz[vi1]*lamiz[vi2]; + T polyz = lamiz[vi1] - lamiz[vi2]; + T bubxy = lami[vi1]; for (int j = 0; j < eorder-1; j++) { @@ -2538,11 +2543,12 @@ namespace netgen case PYRAMID: { shapes = 0.0; - double x = xi(0); - double y = xi(1); - double z = xi(2); + T x = xi(0); + T y = xi(1); + T z = xi(2); - if (z == 1.) z = 1-1e-10; + // if (z == 1.) z = 1-1e-10; + z *= (1-1e-12); shapes[0] = (1-z-x)*(1-z-y) / (1-z); shapes[1] = x*(1-z-y) / (1-z); shapes[2] = x*y / (1-z); @@ -2551,7 +2557,7 @@ namespace netgen if (info.order == 1) return; - double sigma[4] = + T sigma[4] = { sigma[0] = ( (1-z-x) + (1-z-y) ), sigma[1] = ( x + (1-z-y) ), @@ -2570,7 +2576,7 @@ namespace netgen if (el[vi1] > el[vi2]) swap (vi1, vi2); CalcScaledEdgeShape (eorder, sigma[vi1]-sigma[vi2], 1-z, &shapes(ii)); - double fac = (shapes[vi1]+shapes[vi2]) / (1-z); + T fac = (shapes[vi1]+shapes[vi2]) / (1-z); for (int j = 0; j < eorder-1; j++) shapes(ii+j) *= fac; @@ -2586,9 +2592,9 @@ namespace netgen case HEX: { shapes = 0.0; - double x = xi(0); - double y = xi(1); - double z = xi(2); + T x = xi(0); + T y = xi(1); + T z = xi(2); shapes[0] = (1-x)*(1-y)*(1-z); shapes[1] = x *(1-y)*(1-z); @@ -2601,7 +2607,7 @@ namespace netgen if (info.order == 1) return; - double mu[8] = { + T mu[8] = { (1-x)+(1-y)+(1-z), x +(1-y)+(1-z), x + y +(1-z), @@ -2624,7 +2630,7 @@ namespace netgen if (el[vi1] > el[vi2]) swap (vi1, vi2); CalcEdgeShape (eorder, mu[vi1]-mu[vi2], &shapes(ii)); - double lame = shapes(vi1)+shapes(vi2); + T lame = shapes(vi1)+shapes(vi2); for (int j = 0; j < order-1; j++) shapes(ii+j) *= lame; ii += eorder-1; @@ -2642,24 +2648,27 @@ namespace netgen } + template void CurvedElements :: - CalcElementDShapes (ElementInfo & info, const Point<3> & xi, MatrixFixWidth<3> & dshapes) const + CalcElementDShapes (ElementInfo & info, const Point<3,T> xi, MatrixFixWidth<3,T> dshapes) const { + // static int timer = NgProfiler::CreateTimer ("calcelementdshapes"); + const Element & el = mesh[info.elnr]; - dshapes.SetSize(info.ndof); - dshapes = 0.0; - - - + // dshapes.SetSize(info.ndof); + if ( (long int)(&dshapes(0,0)) % alignof(T) != 0) + throw NgException ("alignment problem"); + if (dshapes.Height() != info.ndof) + throw NgException ("wrong height"); if (rational && info.order >= 2) { - double w = 1; - double dw[3] = { 0, 0, 0 }; + T w = 1; + T dw[3] = { 0, 0, 0 }; - double lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; - double dlami[4][3] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { -1, -1, -1 }}; - double shapes[10]; + T lami[4] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; + T dlami[4][3] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { -1, -1, -1 }}; + T shapes[10]; for (int j = 0; j < 4; j++) { @@ -2672,7 +2681,7 @@ namespace netgen const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TET); for (int j = 0; j < 6; j++) { - double wi = edgeweight[info.edgenrs[j]]; + T wi = edgeweight[info.edgenrs[j]]; shapes[j+4] = 2 * wi * lami[edges[j][0]-1] * lami[edges[j][1]-1]; for (int k = 0; k < 3; k++) @@ -2696,6 +2705,8 @@ namespace netgen { case TET: { + dshapes = T(0.0); + dshapes(0,0) = 1; dshapes(1,1) = 1; dshapes(2,2) = 1; @@ -2705,7 +2716,7 @@ namespace netgen if (info.order == 1) return; - double lami[] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; + T lami[] = { xi(0), xi(1), xi(2), 1-xi(0)-xi(1)-xi(2) }; int ii = 4; const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TET); for (int i = 0; i < 6; i++) @@ -2718,7 +2729,7 @@ namespace netgen CalcScaledEdgeShapeDxDt<3> (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &dshapes(ii,0)); - Mat<2,3> trans; + Mat<2,3,T> trans; for (int j = 0; j < 3; j++) { trans(0,j) = dshapes(vi1,j)-dshapes(vi2,j); @@ -2727,8 +2738,8 @@ namespace netgen for (int j = 0; j < order-1; j++) { - double ddx = dshapes(ii+j,0); - double ddt = dshapes(ii+j,1); + T ddx = dshapes(ii+j,0); + T ddt = dshapes(ii+j,1); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); dshapes(ii+j,2) = ddx * trans(0,2) + ddt * trans(1,2); @@ -2754,7 +2765,7 @@ namespace netgen lami[fnums[2]], lami[fnums[0]]+lami[fnums[1]]+lami[fnums[2]], &dshapes(ii,0)); - Mat<3,3> trans; + Mat<3,3,T> trans; for (int j = 0; j < 3; j++) { trans(0,j) = dshapes(fnums[1],j)-dshapes(fnums[0],j); @@ -2765,9 +2776,9 @@ namespace netgen int nfd = (forder-1)*(forder-2)/2; for (int j = 0; j < nfd; j++) { - double ddx = dshapes(ii+j,0); - double ddy = dshapes(ii+j,1); - double ddt = dshapes(ii+j,2); + T ddx = dshapes(ii+j,0); + T ddy = dshapes(ii+j,1); + T ddt = dshapes(ii+j,2); dshapes(ii+j,0) = ddx * trans(0,0) + ddy * trans(1,0) + ddt * trans(2,0); dshapes(ii+j,1) = ddx * trans(0,1) + ddy * trans(1,1) + ddt * trans(2,1); dshapes(ii+j,2) = ddx * trans(0,2) + ddy * trans(1,2) + ddt * trans(2,2); @@ -2784,7 +2795,7 @@ namespace netgen { if (dshapes.Height() == 4) { - dshapes = 0.0; + dshapes = T(0.0); dshapes(0,0) = 1; dshapes(1,1) = 1; @@ -2795,11 +2806,11 @@ namespace netgen } else { - AutoDiff<3> x(xi(0), 0); - AutoDiff<3> y(xi(1), 1); - AutoDiff<3> z(xi(2), 2); - AutoDiff<3> lam4 = 1-x-y-z; - AutoDiff<3> shapes[10]; + AutoDiff<3,T> x(xi(0), 0); + AutoDiff<3,T> y(xi(1), 1); + AutoDiff<3,T> z(xi(2), 2); + AutoDiff<3,T> lam4 = 1-x-y-z; + AutoDiff<3,T> shapes[10]; shapes[0] = 2 * x * x - x; shapes[1] = 2 * y * y - y; @@ -2829,10 +2840,10 @@ namespace netgen case PRISM: { - double lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) }; - double lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) }; - double dlamiz[6] = { -1, -1, -1, 1, 1, 1 }; - double dlami[6][2] = + T lami[6] = { xi(0), xi(1), 1-xi(0)-xi(1), xi(0), xi(1), 1-xi(0)-xi(1) }; + T lamiz[6] = { 1-xi(2), 1-xi(2), 1-xi(2), xi(2), xi(2), xi(2) }; + T dlamiz[6] = { -1, -1, -1, 1, 1, 1 }; + T dlami[6][2] = { { 1, 0, }, { 0, 1, }, { -1, -1 }, @@ -2863,11 +2874,12 @@ namespace netgen vi1 = vi1 % 3; vi2 = vi2 % 3; - Vector shapei(order+1); + ArrayMem shapei_mem(order+1); + TFlatVector shapei(order+1, &shapei_mem[0]); CalcScaledEdgeShapeDxDt<3> (order, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &dshapes(ii,0) ); CalcScaledEdgeShape(order, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &shapei(0) ); - Mat<2,2> trans; + Mat<2,2,T> trans; for (int j = 0; j < 2; j++) { trans(0,j) = dlami[vi1][j]-dlami[vi2][j]; @@ -2876,16 +2888,16 @@ namespace netgen for (int j = 0; j < order-1; j++) { - double ddx = dshapes(ii+j,0); - double ddt = dshapes(ii+j,1); + T ddx = dshapes(ii+j,0); + T ddt = dshapes(ii+j,1); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); } - double facz = (i < 3) ? (1-xi(2)) : xi(2); - double dfacz = (i < 3) ? (-1) : 1; + T facz = (i < 3) ? (1-xi(2)) : xi(2); + T dfacz = (i < 3) ? (-1) : 1; for (int j = 0; j < order-1; j++) { dshapes(ii+j,0) *= facz; @@ -2905,13 +2917,13 @@ namespace netgen int vi1 = (edges[i][0]-1), vi2 = (edges[i][1]-1); if (el[vi1] > el[vi2]) swap (vi1, vi2); - double bubz = lamiz[vi1] * lamiz[vi2]; - double dbubz = dlamiz[vi1]*lamiz[vi2] + lamiz[vi1]*dlamiz[vi2]; - double polyz = lamiz[vi1] - lamiz[vi2]; - double dpolyz = dlamiz[vi1] - dlamiz[vi2]; - double bubxy = lami[(vi1)%3]; - double dbubxydx = dlami[(vi1)%3][0]; - double dbubxydy = dlami[(vi1)%3][1]; + T bubz = lamiz[vi1] * lamiz[vi2]; + T dbubz = dlamiz[vi1]*lamiz[vi2] + lamiz[vi1]*dlamiz[vi2]; + T polyz = lamiz[vi1] - lamiz[vi2]; + T dpolyz = dlamiz[vi1] - dlamiz[vi2]; + T bubxy = lami[(vi1)%3]; + T dbubxydx = dlami[(vi1)%3][0]; + T dbubxydy = dlami[(vi1)%3][1]; for (int j = 0; j < eorder-1; j++) { @@ -2942,8 +2954,10 @@ namespace netgen if(el[fav[1]] > el[fav[2]]) swap(fav[1],fav[2]); if(el[fav[0]] > el[fav[1]]) swap(fav[0],fav[1]); - MatrixFixWidth<2> dshapei(ndf); - Vector shapei(ndf); + ArrayMem dshapei_mem(ndf); + ArrayMem shapei_mem(ndf); + MatrixFixWidth<2,T> dshapei(ndf, &dshapei_mem[0]); + TFlatVector shapei(ndf, &shapei_mem[0]); CalcTrigShapeDxDy (forder, lami[fav[2]]-lami[fav[1]], lami[fav[0]], @@ -2951,7 +2965,7 @@ namespace netgen CalcTrigShape (forder, lami[fav[2]]-lami[fav[1]], lami[fav[0]], &shapei(0)); - Mat<2,2> trans; + Mat<2,2,T> trans; for (int j = 0; j < 2; j++) { trans(0,j) = dlami[fav[2]][j]-dlami[fav[1]][j]; @@ -2962,8 +2976,8 @@ namespace netgen { // double ddx = dshapes(ii+j,0); // double ddt = dshapes(ii+j,1); - double ddx = dshapei(j,0); - double ddt = dshapei(j,1); + T ddx = dshapei(j,0); + T ddt = dshapei(j,1); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); } @@ -2983,14 +2997,15 @@ namespace netgen case PYRAMID: { - dshapes = 0.0; - double x = xi(0); - double y = xi(1); - double z = xi(2); + dshapes = T(0.0); + T x = xi(0); + T y = xi(1); + T z = xi(2); - if (z == 1.) z = 1-1e-10; - double z1 = 1-z; - double z2 = z1*z1; + // if (z == 1.) z = 1-1e-10; + z *= 1-1e-12; + T z1 = 1-z; + T z2 = z1*z1; dshapes(0,0) = -(z1-y)/z1; dshapes(0,1) = -(z1-x)/z1; @@ -3016,29 +3031,30 @@ namespace netgen int ii = 5; const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (PYRAMID); - if (z == 1.) z = 1-1e-10; - double shapes[5]; + // if (z == 1.) z = 1-1e-10; + z *= 1-1e-12; + T shapes[5]; shapes[0] = (1-z-x)*(1-z-y) / (1-z); shapes[1] = x*(1-z-y) / (1-z); shapes[2] = x*y / (1-z); shapes[3] = (1-z-x)*y / (1-z); shapes[4] = z; - double sigma[4] = + T sigma[4] = { ( (1-z-x) + (1-z-y) ), ( x + (1-z-y) ), ( x + y ), ( (1-z-x) + y ), }; - double dsigma[4][3] = + T dsigma[4][3] = { { -1, -1, -2 }, { 1, -1, -1 }, { 1, 1, 0 }, { -1, 1, -1 } }; - double dz[3] = { 0, 0, 1 }; + T dz[3] = { 0, 0, 1 }; for (int i = 0; i < 4; i++) // horizontal edges { int eorder = edgeorder[info.edgenrs[i]]; @@ -3047,11 +3063,12 @@ namespace netgen int vi1 = (edges[i][0]-1), vi2 = (edges[i][1]-1); if (el[vi1] > el[vi2]) swap (vi1, vi2); - Vector shapei(eorder+1); + ArrayMem shapei_mem(eorder+1); + TFlatVector shapei(eorder+1,&shapei_mem[0]); CalcScaledEdgeShapeDxDt<3> (eorder, sigma[vi1]-sigma[vi2], 1-z, &dshapes(ii,0) ); CalcScaledEdgeShape(eorder, sigma[vi1]-sigma[vi2], 1-z, &shapei(0) ); - double fac = (shapes[vi1]+shapes[vi2]) / (1-z); - double dfac[3]; + T fac = (shapes[vi1]+shapes[vi2]) / (1-z); + T dfac[3]; for (int k = 0; k < 3; k++) dfac[k] = ( (dshapes(vi1,k)+dshapes(vi2,k)) * (1-z) - (shapes[vi1]+shapes[vi2]) *(-dshapes(4,k)) ) @@ -3059,8 +3076,8 @@ namespace netgen for (int j = 0; j < eorder-1; j++) { - double ddx = dshapes(ii+j,0); - double ddt = dshapes(ii+j,1); + T ddx = dshapes(ii+j,0); + T ddt = dshapes(ii+j,1); for (int k = 0; k < 3; k++) dshapes(ii+j,k) = fac * (ddx * (dsigma[vi1][k]-dsigma[vi2][k]) - ddt*dz[k]) + dfac[k] * shapei(j); @@ -3075,11 +3092,10 @@ namespace netgen case HEX: { - dshapes = 0.0; - - double x = xi(0); - double y = xi(1); - double z = xi(2); + // NgProfiler::StartTimer(timer); + T x = xi(0); + T y = xi(1); + T z = xi(2); // shapes[0] = (1-x)*(1-y)*(1-z); dshapes(0,0) = - (1-y)*(1-z); @@ -3121,10 +3137,11 @@ namespace netgen dshapes(7,1) = (1-x) * z; dshapes(7,2) = (1-x) * y; + // NgProfiler::StopTimer(timer); if (info.order == 1) return; - double shapes[8] = { + T shapes[8] = { (1-x)*(1-y)*(1-z), x *(1-y)*(1-z), x * y *(1-z), @@ -3135,7 +3152,7 @@ namespace netgen (1-x)* y *(z), }; - double mu[8] = { + T mu[8] = { (1-x)+(1-y)+(1-z), x +(1-y)+(1-z), x + y +(1-z), @@ -3146,7 +3163,7 @@ namespace netgen (1-x)+ y +(z) }; - double dmu[8][3] = { + T dmu[8][3] = { { -1, -1, -1 }, { 1, -1, -1 }, { 1, 1, -1 }, @@ -3157,7 +3174,7 @@ namespace netgen { -1, 1, 1 } }; - ArrayMem hshapes(order+1), hdshapes(order+1); + ArrayMem hshapes(order+1), hdshapes(order+1); int ii = 8; const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (HEX); @@ -3172,8 +3189,8 @@ namespace netgen CalcEdgeShapeDx (eorder, mu[vi1]-mu[vi2], &hshapes[0], &hdshapes[0]); - double lame = shapes[vi1]+shapes[vi2]; - double dlame[3] = { + T lame = shapes[vi1]+shapes[vi2]; + T dlame[3] = { dshapes(vi1, 0) + dshapes(vi2, 0), dshapes(vi1, 1) + dshapes(vi2, 1), dshapes(vi1, 2) + dshapes(vi2, 2) @@ -3779,36 +3796,43 @@ namespace netgen + template void CurvedElements :: CalcMultiPointElementTransformation (ElementIndex elnr, int n, - const double * xi, size_t sxi, - double * x, size_t sx, - double * dxdxi, size_t sdxdxi) + const T * xi, size_t sxi, + T * x, size_t sx, + T * dxdxi, size_t sdxdxi) { - // static int timer = NgProfiler::CreateTimer ("calcmultipointtrafo, calcshape"); - + static int timer = NgProfiler::CreateTimer ("calcmultipointelementtrafo"); + static int timer1 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 1"); + static int timer2 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 2"); + static int timer3 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 3"); + static int timer4 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 4"); + static int timer5 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 5"); + // NgProfiler::StartTimer (timer); + // NgProfiler::StartTimer (timer1); if (mesh.coarsemesh) { const HPRefElement & hpref_el = (*mesh.hpelements) [mesh[elnr].hp_elnr]; // xi umrechnen - double lami[8]; - FlatVector vlami(8, lami); + T lami[8]; + TFlatVector vlami(8, &lami[0]); - ArrayMem coarse_xi (3*n); + ArrayMem coarse_xi (3*n); for (int pi = 0; pi < n; pi++) { vlami = 0; - Point<3> pxi; + Point<3,T> pxi; for (int j = 0; j < 3; j++) pxi(j) = xi[pi*sxi+j]; - mesh[elnr].GetShapeNew ( pxi, vlami); + mesh[elnr].GetShapeNew (pxi, vlami); - Point<3> cxi(0,0,0); + Point<3,T> cxi(0,0,0); for (int i = 0; i < hpref_el.np; i++) for (int j = 0; j < 3; j++) cxi(j) += hpref_el.param[i][j] * lami[i]; @@ -3823,15 +3847,15 @@ namespace netgen x, sx, dxdxi, sdxdxi); - Mat<3,3> trans, dxdxic; + Mat<3,3,T> trans, dxdxic; if (dxdxi) { - MatrixFixWidth<3> dlami(8); - dlami = 0; + MatrixFixWidth<3,T> dlami(8); + dlami = T(0); for (int pi = 0; pi < n; pi++) { - Point<3> pxi; + Point<3,T> pxi; for (int j = 0; j < 3; j++) pxi(j) = xi[pi*sxi+j]; @@ -3843,7 +3867,7 @@ namespace netgen for (int i = 0; i < hpref_el.np; i++) trans(l,k) += hpref_el.param[i][l] * dlami(i, k); - Mat<3> mat_dxdxic, mat_dxdxi; + Mat<3,3,T> mat_dxdxic, mat_dxdxi; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) mat_dxdxic(j,k) = dxdxi[pi*sdxdxi+3*j+k]; @@ -3863,10 +3887,8 @@ namespace netgen - - - - MatrixFixWidth<3> dshapes; + // NgProfiler::StopTimer (timer1); + // NgProfiler::StartTimer (timer2); const Element & el = mesh[elnr]; @@ -3896,29 +3918,43 @@ namespace netgen // info.ndof += facecoeffsindex[info.facenr+1] - facecoeffsindex[info.facenr]; } + // NgProfiler::StopTimer (timer2); + // NgProfiler::StartTimer (timer3); + + ArrayMem,100> coefs(info.ndof); - ArrayMem shapes_mem(info.ndof); - Vector shapes(info.ndof, &shapes_mem[0]); + ArrayMem shapes_mem(info.ndof); + + TFlatVector shapes(info.ndof, &shapes_mem[0]); + + ArrayMem dshapes_mem(3*info.ndof); + MatrixFixWidth<3,T> dshapes(info.ndof, &dshapes_mem[0]); + + // NgProfiler::StopTimer (timer3); + // NgProfiler::StartTimer (timer4); GetCoefficients (info, &coefs[0]); if (x) { for (int j = 0; j < n; j++) { - Point<3> xij, xj; + Point<3,T> xij, xj; for (int k = 0; k < 3; k++) xij(k) = xi[j*sxi+k]; - CalcElementShapes (info, xij, shapes); - xj = 0; + xj = T(0.0); for (int i = 0; i < coefs.Size(); i++) - xj += shapes(i) * coefs[i]; + for (int k = 0; k < 3; k++) + xj(k) += shapes(i) * coefs[i](k); for (int k = 0; k < 3; k++) x[j*sx+k] = xj(k); } } + // NgProfiler::StopTimer (timer4); + // NgProfiler::StartTimer (timer5); + if (dxdxi) { if (info.order == 1 && type == TET) @@ -3926,13 +3962,13 @@ namespace netgen if (n > 0) { - Point<3> xij; + Point<3,T> xij; for (int k = 0; k < 3; k++) xij(k) = xi[k]; CalcElementDShapes (info, xij, dshapes); - Mat<3> dxdxij; + Mat<3,3,T> dxdxij; dxdxij = 0.0; for (int i = 0; i < coefs.Size(); i++) for (int j = 0; j < 3; j++) @@ -3950,36 +3986,94 @@ namespace netgen { for (int ip = 0; ip < n; ip++) { - Point<3> xij; + Point<3,T> xij; for (int k = 0; k < 3; k++) xij(k) = xi[ip*sxi+k]; - + + // just for testing ... + dshapes = T(0.0); + const Element & el = mesh[info.elnr]; + if (el.GetType() != HEX) + throw NgException ("not a hex"); CalcElementDShapes (info, xij, dshapes); - - Mat<3> dxdxij; + + /* + Mat<3,3,T> dxdxij; dxdxij = 0.0; for (int i = 0; i < coefs.Size(); i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) dxdxij(j,k) += dshapes(i,k) * coefs[i](j); - - + for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) dxdxi[ip*sdxdxi+3*j+k] = dxdxij(j,k); + */ + + T dxdxi00 = T(0.0); + T dxdxi01 = T(0.0); + T dxdxi02 = T(0.0); + T dxdxi10 = T(0.0); + T dxdxi11 = T(0.0); + T dxdxi12 = T(0.0); + T dxdxi20 = T(0.0); + T dxdxi21 = T(0.0); + T dxdxi22 = T(0.0); + + for (int i = 0; i < coefs.Size(); i++) + { + T ds0 = dshapes(i,0); + T ds1 = dshapes(i,1); + T ds2 = dshapes(i,2); + T cf0 = coefs[i](0); + T cf1 = coefs[i](1); + T cf2 = coefs[i](2); + + dxdxi00 += ds0*cf0; + dxdxi01 += ds1*cf0; + dxdxi02 += ds2*cf0; + dxdxi10 += ds0*cf1; + dxdxi11 += ds1*cf1; + dxdxi12 += ds2*cf1; + dxdxi20 += ds0*cf2; + dxdxi21 += ds1*cf2; + dxdxi22 += ds2*cf2; + } + + dxdxi[ip*sdxdxi+3*0+0] = dxdxi00; + dxdxi[ip*sdxdxi+3*0+1] = dxdxi01; + dxdxi[ip*sdxdxi+3*0+2] = dxdxi02; + + dxdxi[ip*sdxdxi+3*1+0] = dxdxi10; + dxdxi[ip*sdxdxi+3*1+1] = dxdxi11; + dxdxi[ip*sdxdxi+3*1+2] = dxdxi12; + + dxdxi[ip*sdxdxi+3*2+0] = dxdxi20; + dxdxi[ip*sdxdxi+3*2+1] = dxdxi21; + dxdxi[ip*sdxdxi+3*2+2] = dxdxi22; } } } + + // NgProfiler::StopTimer (timer5); + // NgProfiler::StopTimer (timer); } + template + void CurvedElements :: + CalcMultiPointElementTransformation + (ElementIndex elnr, int n, + const double * xi, size_t sxi, + double * x, size_t sx, + double * dxdxi, size_t sdxdxi); - - - - - + template + void CurvedElements :: + CalcMultiPointElementTransformation + (ElementIndex elnr, int n, + const SIMD * xi, size_t sxi, + SIMD * x, size_t sx, + SIMD * dxdxi, size_t sdxdxi); }; - - diff --git a/libsrc/meshing/curvedelems.hpp b/libsrc/meshing/curvedelems.hpp index 9704374d..a30c31ae 100644 --- a/libsrc/meshing/curvedelems.hpp +++ b/libsrc/meshing/curvedelems.hpp @@ -141,10 +141,11 @@ public: Array< Point<3> > * x, Array< Mat<3,3> > * dxdxi); + template void CalcMultiPointElementTransformation (ElementIndex elnr, int n, - const double * xi, size_t sxi, - double * x, size_t sx, - double * dxdxi, size_t sdxdxi); + const T * xi, size_t sxi, + T * x, size_t sx, + T * dxdxi, size_t sdxdxi); @@ -196,10 +197,11 @@ private: Vec<3> hcoefs[10]; // enough for second order tets }; - - void CalcElementShapes (ElementInfo & info, const Point<3> & xi, Vector & shapes) const; + template + void CalcElementShapes (ElementInfo & info, Point<3,T> xi, TFlatVector shapes) const; void GetCoefficients (ElementInfo & info, Vec<3> * coefs) const; - void CalcElementDShapes (ElementInfo & info, const Point<3> & xi, MatrixFixWidth<3> & dshapes) const; + template + void CalcElementDShapes (ElementInfo & info, const Point<3,T> xi, MatrixFixWidth<3,T> dshapes) const; class SurfaceElementInfo diff --git a/libsrc/meshing/meshtype.cpp b/libsrc/meshing/meshtype.cpp index b6eb599f..0b0720a8 100644 --- a/libsrc/meshing/meshtype.cpp +++ b/libsrc/meshing/meshtype.cpp @@ -1965,9 +1965,9 @@ namespace netgen } } - + template void Element :: - GetDShapeNew (const Point<3> & p, MatrixFixWidth<3> & dshape) const + GetDShapeNew (const Point<3,T> & p, MatrixFixWidth<3,T> & dshape) const { switch (typ) { diff --git a/libsrc/meshing/meshtype.hpp b/libsrc/meshing/meshtype.hpp index 37b08460..19056e21 100644 --- a/libsrc/meshing/meshtype.hpp +++ b/libsrc/meshing/meshtype.hpp @@ -125,7 +125,6 @@ namespace netgen PointIndex operator-- (int) { PointIndex hi(*this); i--; return hi; } PointIndex operator++ () { i++; return *this; } PointIndex operator-- () { i--; return *this; } - #ifdef BASE0 enum { BASE = 0 }; #else @@ -718,9 +717,12 @@ namespace netgen void GetShape (const Point<3> & p, class Vector & shape) const; void GetShapeNew (const Point<3> & p, class FlatVector & shape) const; + template + void GetShapeNew (const Point<3,T> & p, TFlatVector shape) const; /// matrix 2 * np void GetDShape (const Point<3> & p, class DenseMatrix & dshape) const; - void GetDShapeNew (const Point<3> & p, class MatrixFixWidth<3> & dshape) const; + template + void GetDShapeNew (const Point<3,T> & p, class MatrixFixWidth<3,T> & dshape) const; /// matrix 3 * np void GetPointMatrix (const T_POINTS & points, class DenseMatrix & pmat) const; From ffb5a8c8da30e13f76fbfd1ae722525f142f9007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Thu, 7 Jul 2016 15:28:27 +0200 Subject: [PATCH 02/10] evaluate curved element mapping without storing shape functions --- libsrc/gprim/geomops.hpp | 12 +- libsrc/interface/nginterface_v2.cpp | 1 - libsrc/meshing/curvedelems.cpp | 347 ++++++++++++++++++++++++---- libsrc/meshing/curvedelems.hpp | 2 + 4 files changed, 309 insertions(+), 53 deletions(-) diff --git a/libsrc/gprim/geomops.hpp b/libsrc/gprim/geomops.hpp index 45614c6b..4648e685 100644 --- a/libsrc/gprim/geomops.hpp +++ b/libsrc/gprim/geomops.hpp @@ -348,8 +348,8 @@ namespace netgen - template - inline ostream & operator<< (ostream & ost, const Vec & a) + template + inline ostream & operator<< (ostream & ost, const Vec & a) { ost << "("; for (int i = 0; i < D-1; i++) @@ -358,8 +358,8 @@ namespace netgen return ost; } - template - inline ostream & operator<< (ostream & ost, const Point & a) + template + inline ostream & operator<< (ostream & ost, const Point & a) { ost << "("; for (int i = 0; i < D-1; i++) @@ -375,8 +375,8 @@ namespace netgen return ost; } - template - inline ostream & operator<< (ostream & ost, const Mat & m) + template + inline ostream & operator<< (ostream & ost, const Mat & m) { ost << "("; for (int i = 0; i < H; i++) diff --git a/libsrc/interface/nginterface_v2.cpp b/libsrc/interface/nginterface_v2.cpp index 56e34ce2..1b9b3eee 100644 --- a/libsrc/interface/nginterface_v2.cpp +++ b/libsrc/interface/nginterface_v2.cpp @@ -650,7 +650,6 @@ namespace netgen reinterpret_cast*> (xi), sxi, reinterpret_cast*> (x), sx, reinterpret_cast*> (dxdxi), sdxdxi); - /* for (int i = 0; i < npts; i++) { diff --git a/libsrc/meshing/curvedelems.cpp b/libsrc/meshing/curvedelems.cpp index f5db4038..f0763cdd 100644 --- a/libsrc/meshing/curvedelems.cpp +++ b/libsrc/meshing/curvedelems.cpp @@ -59,7 +59,19 @@ namespace netgen shape[j-2] = p1; } } + template + static void CalcEdgeShapeLambda (int n, T x, FUNC func) + { + T p1(x), p2(-1.0), p3(0.0); + for (int j=2; j<=n; j++) + { + p3=p2; p2=p1; + p1=( (2*j-3) * x * p2 - (j-3) * p3) / j; + func(j-2, p1); + } + } + static void CalcEdgeDx (int n, double x, double * dshape) { double p1 = x, p2 = -1, p3 = 0; @@ -122,7 +134,35 @@ namespace netgen shape[j] = p1; } } + + template + static void CalcScaledEdgeShapeLambda (int n, T x, T t, FUNC func) + { + static bool init = false; + static double coefs[100][2]; + if (!init) + { + for (int j = 0; j < 100; j++) + { + coefs[j][0] = double(2*j+1)/(j+2); + coefs[j][1] = -double(j-1)/(j+2); + } + init = true; + } + T p1(x), p2(-1.0), p3(0.0); + T tt = t*t; + for (int j=0; j<=n-2; j++) + { + p3=p2; p2=p1; + p1= coefs[j][0] * x * p2 + coefs[j][1] * tt*p3; + // p1=( (2*j+1) * x * p2 - t*t*(j-1) * p3) / (j+2); + func(j, p1); + } + } + + + template static void CalcScaledEdgeShapeDxDt (int n, T x, T t, T * dshape) { @@ -346,26 +386,12 @@ namespace netgen // compute face bubbles up to order n, 0 < y, y-x < 1, x+y < 1 template static void CalcTrigShape (int n, Tx x, Ty y, Ts * shape) - { - // static int timer = NgProfiler::CreateTimer ("Curvedels - CalcTrigShape"); - // NgProfiler::RegionTimer reg (timer); - + { + // cout << "calc trig shape" << endl; if (n < 3) return; Tx hx[50], hy[50*50]; - // ScaledJacobiPolynomial (n-3, x, 1-y, 2, 2, hx); - /* - cout << "scaled jacobi, old: " << endl; - for (int i = 0; i <= n-3; i++) - cout << i << ": " << hx[i] << endl; - */ + jacpols2[2] -> EvaluateScaled (n-3, x, 1-y, hx); - /* - cout << "scaled jacobi, new: " << endl; - for (int i = 0; i <= n-3; i++) - cout << i << ": " << hx[i] << endl; - */ - // for (int ix = 0; ix <= n-3; ix++) - // JacobiPolynomial (n-3, 2*y-1, 2*ix+5, 2, hy+50*ix); for (int ix = 0; ix <= n-3; ix++) jacpols2[2*ix+5] -> Evaluate (n-3, 2*y-1, hy+50*ix); @@ -376,9 +402,15 @@ namespace netgen for (int ix = 0; ix <= n-3; ix++) hx[ix] *= bub; + /* for (int iy = 0; iy <= n-3; iy++) for (int ix = 0; ix <= n-3-iy; ix++) shape[ii++] = hx[ix]*hy[iy+50*ix]; + */ + // change loops: + for (int ix = 0; ix <= n-3; ix++) + for (int iy = 0; iy <= n-3-ix; iy++) + shape[ii++] = hx[ix]*hy[iy+50*ix]; } template @@ -405,28 +437,43 @@ namespace netgen { if (n < 3) return; - Tx hx[50], hy[50*50]; - // ScaledLegendrePolynomial (n-3, (2*x-1), t-y, hx); + Tx hx[50], hy[50]; ScaledJacobiPolynomial (n-3, x, t-y, 2, 2, hx); - // ScaledLegendrePolynomial (n-3, (2*y-1), t, hy); - for (int ix = 0; ix <= n-3; ix++) - jacpols2[2*ix+5] -> EvaluateScaled (n-3, 2*y-1, t, hy+50*ix); - // ScaledJacobiPolynomial (n-3, 2*y-1, t, 2*ix+5, 2, hy+50*ix); - - int ii = 0; Tx bub = (t+x-y)*y*(t-x-y); - for (int iy = 0; iy <= n-3; iy++) - for (int ix = 0; ix <= n-3-iy; ix++) - shape[ii++] = bub * hx[ix]*hy[iy+50*ix]; + for (int ix = 0; ix <= n-3; ix++) + { + jacpols2[2*ix+5] -> EvaluateScaled (n-3, 2*y-1, t, hy); + for (int iy = 0; iy <= n-3-ix; iy++) + shape[ii++] = bub * hx[ix]*hy[iy]; + } } + template + static void CalcScaledTrigShapeLambda (int n, Tx x, Ty y, Tt t, FUNC func) + { + if (n < 3) return; + Tx hx[50], hy[50]; + ScaledJacobiPolynomial (n-3, x, t-y, 2, 2, hx); + + int ii = 0; + Tx bub = (t+x-y)*y*(t-x-y); + for (int ix = 0; ix <= n-3; ix++) + { + jacpols2[2*ix+5] -> EvaluateScaled (n-3, 2*y-1, t, hy); + for (int iy = 0; iy <= n-3-ix; iy++) + func(ii++, bub * hx[ix]*hy[iy]); + } + } + + // compute face bubbles up to order n, 0 < y, y-x < 1, x+y < 1 template static void CalcScaledTrigShapeDxDyDt (int n, T x, T y, T t, T * dshape) { + /* if (n < 3) return; AutoDiff<3,T> adx(x, 0); AutoDiff<3,T> ady(y, 1); @@ -440,6 +487,18 @@ namespace netgen dshape[3*i+1] = res[i].DValue(1); dshape[3*i+2] = res[i].DValue(2); } + */ + if (n < 3) return; + AutoDiff<3,T> adx(x, 0); + AutoDiff<3,T> ady(y, 1); + AutoDiff<3,T> adt(t, 2); + CalcScaledTrigShapeLambda (n, adx, ady, adt, + [&] (int i, AutoDiff<3,T> shape) + { + dshape[3*i] = shape.DValue(0); + dshape[3*i+1] = shape.DValue(1); + dshape[3*i+2] = shape.DValue(2); + }); } @@ -2701,10 +2760,20 @@ namespace netgen return; } + /* + if (typeid(T) == typeid(SIMD)) + { + if (el.GetType() == HEX) + dshapes = T(0.0); + return; + } + */ switch (el.GetType()) { case TET: { + // if (typeid(T) == typeid(SIMD)) return; + dshapes = T(0.0); dshapes(0,0) = 1; @@ -2793,6 +2862,8 @@ namespace netgen case TET10: { + // if (typeid(T) == typeid(SIMD)) return; + if (dshapes.Height() == 4) { dshapes = T(0.0); @@ -2862,7 +2933,7 @@ namespace netgen if (info.order == 1) return; - + const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (PRISM); for (int i = 0; i < 6; i++) // horizontal edges { @@ -2909,6 +2980,9 @@ namespace netgen } } + if (typeid(T) == typeid(SIMD)) return; + + for (int i = 6; i < 9; i++) // vertical edges { int eorder = edgeorder[info.edgenrs[i]]; @@ -2954,8 +3028,8 @@ namespace netgen if(el[fav[1]] > el[fav[2]]) swap(fav[1],fav[2]); if(el[fav[0]] > el[fav[1]]) swap(fav[0],fav[1]); - ArrayMem dshapei_mem(ndf); - ArrayMem shapei_mem(ndf); + ArrayMem dshapei_mem(ndf); + ArrayMem shapei_mem(ndf); MatrixFixWidth<2,T> dshapei(ndf, &dshapei_mem[0]); TFlatVector shapei(ndf, &shapei_mem[0]); @@ -2997,6 +3071,8 @@ namespace netgen case PYRAMID: { + if (typeid(T) == typeid(SIMD)) return; + dshapes = T(0.0); T x = xi(0); T y = xi(1); @@ -3092,6 +3168,8 @@ namespace netgen case HEX: { + // if (typeid(T) == typeid(SIMD)) return; + // NgProfiler::StartTimer(timer); T x = xi(0); T y = xi(1); @@ -3261,7 +3339,165 @@ namespace netgen } + template + bool CurvedElements :: + EvaluateMapping (ElementInfo & info, Point<3,T> xi, Point<3,T> & mx, Mat<3,3,T> & jac) const + { + const Element & el = mesh[info.elnr]; + if (rational && info.order >= 2) return false; // not supported + AutoDiff<3,T> x(xi(0), 0); + AutoDiff<3,T> y(xi(1), 1); + AutoDiff<3,T> z(xi(2), 2); + + AutoDiff<3,T> mapped_x[3] = { T(0.0), T(0.0), T(0.0) } ; + + switch (el.GetType()) + { + case TET: + { + // if (info.order >= 2) return false; // not yet supported + AutoDiff<3,T> lami[4] = { x, y, z, 1-x-y-z }; + for (int j = 0; j < 4; j++) + { + Point<3> p = mesh[el[j]]; + for (int k = 0; k < 3; k++) + mapped_x[k] += p(k) * lami[j]; + } + + const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TET); + for (int i = 0; i < 6; i++) + { + int eorder = edgeorder[info.edgenrs[i]]; + if (eorder >= 2) + { + int first = edgecoeffsindex[info.edgenrs[i]]; + + int vi1 = edges[i][0]-1, vi2 = edges[i][1]-1; + if (el[vi1] > el[vi2]) swap (vi1, vi2); + + CalcScaledEdgeShapeLambda (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], + [&](int i, AutoDiff<3,T> shape) + { + Vec<3> coef = edgecoeffs[first+i]; + for (int k = 0; k < 3; k++) + mapped_x[k] += coef(k) * shape; + }); + } + } + + const ELEMENT_FACE * faces = MeshTopology::GetFaces1 (TET); + for (int i = 0; i < 4; i++) + { + int forder = faceorder[info.facenrs[i]]; + if (forder >= 3) + { + int first = facecoeffsindex[info.facenrs[i]]; + + int fnums[] = { faces[i][0]-1, faces[i][1]-1, faces[i][2]-1 }; + if (el[fnums[0]] > el[fnums[1]]) swap (fnums[0], fnums[1]); + if (el[fnums[1]] > el[fnums[2]]) swap (fnums[1], fnums[2]); + if (el[fnums[0]] > el[fnums[1]]) swap (fnums[0], fnums[1]); + + CalcScaledTrigShapeLambda (forder, + lami[fnums[1]]-lami[fnums[0]], lami[fnums[2]], + lami[fnums[0]]+lami[fnums[1]]+lami[fnums[2]], + [&](int i, AutoDiff<3,T> shape) + { + Vec<3> coef = facecoeffs[first+i]; + for (int k = 0; k < 3; k++) + mapped_x[k] += coef(k) * shape; + }); + } + } + + break; + } + case HEX: + { + if (info.order >= 2) return false; // not yet supported + AutoDiff<3,T> lami[8] = + { (1-x)*(1-y)*(1-z), + ( x)*(1-y)*(1-z), + ( x)* y *(1-z), + (1-x)* y *(1-z), + (1-x)*(1-y)*(z), + ( x)*(1-y)*(z), + ( x)* y *(z), + (1-x)* y *(z) }; + + for (int j = 0; j < 8; j++) + { + Point<3> p = mesh[el[j]]; + for (int k = 0; k < 3; k++) + mapped_x[k] += p(k) * lami[j]; + } + + if (info.order == 1) break; + + AutoDiff<3,T> mu[8] = { + (1-x)+(1-y)+(1-z), + x +(1-y)+(1-z), + x + y +(1-z), + (1-x)+ y +(1-z), + (1-x)+(1-y)+(z), + x +(1-y)+(z), + x + y +(z), + (1-x)+ y +(z), + }; + + int ii = 8; + const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (HEX); + + for (int i = 0; i < 8; i++) + { + int eorder = edgeorder[info.edgenrs[i]]; + if (eorder >= 2) + { + int first = edgecoeffsindex[info.edgenrs[i]]; + int vi1 = edges[i][0]-1, vi2 = edges[i][1]-1; + if (el[vi1] > el[vi2]) swap (vi1, vi2); + + /* + CalcEdgeShape (eorder, mu[vi1]-mu[vi2], &shapes(ii)); + T lame = shapes(vi1)+shapes(vi2); + for (int j = 0; j < order-1; j++) + shapes(ii+j) *= lame; + */ + AutoDiff<3,T> lame = lami[vi1]+lami[vi2]; + CalcEdgeShapeLambda (eorder, mu[vi1]-mu[vi2], + [&](int i, AutoDiff<3,T> shape) + { + Vec<3> coef = edgecoeffs[first+i]; + for (int k = 0; k < 3; k++) + mapped_x[k] += coef(k) * (lame*shape); + }); + + } + } + + + break; + } + default: + return false; + } + + // for (int i = 0; i < 3; i++) + // cout << mapped_x[i] << endl; + for (int i = 0; i < 3; i++) + { + mx(i) = mapped_x[i].Value(); + for (int j = 0; j < 3; j++) + jac(i,j) = mapped_x[i].DValue(j); + } + return true; + } + + + + + void CurvedElements :: GetCoefficients (ElementInfo & info, Vec<3> * coefs) const { @@ -3809,6 +4045,7 @@ namespace netgen static int timer3 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 3"); static int timer4 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 4"); static int timer5 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 5"); + NgProfiler::RegionTimer reg(timer); // NgProfiler::StartTimer (timer); // NgProfiler::StartTimer (timer1); if (mesh.coarsemesh) @@ -3885,8 +4122,6 @@ namespace netgen return; } - - // NgProfiler::StopTimer (timer1); // NgProfiler::StartTimer (timer2); @@ -3920,14 +4155,34 @@ namespace netgen // NgProfiler::StopTimer (timer2); // NgProfiler::StartTimer (timer3); - + + /* + bool ok = true; + for (int i = 0; i < n; i++) + { + Point<3,T> _xi(xi[i*sxi], xi[i*sxi+1], xi[i*sxi+2]); + Point<3,T> _x; + Mat<3,3,T> _dxdxi; + if (!EvaluateMapping (info, _xi, _x, _dxdxi)) + { ok = false; break; } + // cout << "x = " << _x << ", dxdxi = " << _dxdxi << endl; + if (x) + for (int j = 0; j < 3; j++) + x[i*sx+j] = _x[j]; + if (dxdxi) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 3; k++) + dxdxi[i*sdxdxi+3*j+k] = _dxdxi(j,k); + } + if (ok) return; + */ ArrayMem,100> coefs(info.ndof); - ArrayMem shapes_mem(info.ndof); + ArrayMem shapes_mem(info.ndof); TFlatVector shapes(info.ndof, &shapes_mem[0]); - ArrayMem dshapes_mem(3*info.ndof); + ArrayMem dshapes_mem(3*info.ndof); MatrixFixWidth<3,T> dshapes(info.ndof, &dshapes_mem[0]); // NgProfiler::StopTimer (timer3); @@ -3947,11 +4202,14 @@ namespace netgen for (int k = 0; k < 3; k++) xj(k) += shapes(i) * coefs[i](k); + // cout << "old, xj = " << xj << endl; + for (int k = 0; k < 3; k++) x[j*sx+k] = xj(k); } } + // NgProfiler::StopTimer (timer4); // NgProfiler::StartTimer (timer5); @@ -3990,26 +4248,23 @@ namespace netgen for (int k = 0; k < 3; k++) xij(k) = xi[ip*sxi+k]; - // just for testing ... - dshapes = T(0.0); - const Element & el = mesh[info.elnr]; - if (el.GetType() != HEX) - throw NgException ("not a hex"); - CalcElementDShapes (info, xij, dshapes); + CalcElementDShapes (info, xij, dshapes); + - /* Mat<3,3,T> dxdxij; dxdxij = 0.0; for (int i = 0; i < coefs.Size(); i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) dxdxij(j,k) += dshapes(i,k) * coefs[i](j); + + // cout << "old, jac = " << dxdxij << endl; for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) dxdxi[ip*sdxdxi+3*j+k] = dxdxij(j,k); - */ - + + /* T dxdxi00 = T(0.0); T dxdxi01 = T(0.0); T dxdxi02 = T(0.0); @@ -4051,10 +4306,10 @@ namespace netgen dxdxi[ip*sdxdxi+3*2+0] = dxdxi20; dxdxi[ip*sdxdxi+3*2+1] = dxdxi21; dxdxi[ip*sdxdxi+3*2+2] = dxdxi22; + */ } } } - // NgProfiler::StopTimer (timer5); // NgProfiler::StopTimer (timer); } diff --git a/libsrc/meshing/curvedelems.hpp b/libsrc/meshing/curvedelems.hpp index a30c31ae..b10c5a58 100644 --- a/libsrc/meshing/curvedelems.hpp +++ b/libsrc/meshing/curvedelems.hpp @@ -203,6 +203,8 @@ private: template void CalcElementDShapes (ElementInfo & info, const Point<3,T> xi, MatrixFixWidth<3,T> dshapes) const; + template + bool EvaluateMapping (ElementInfo & info, const Point<3,T> xi, Point<3,T> & x, Mat<3,3,T> & jac) const; class SurfaceElementInfo { From 416addf076039d1a20c46a60156edefde9d10ed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Thu, 7 Jul 2016 17:58:13 +0200 Subject: [PATCH 03/10] instantiation and fixes of SIMD - function --- libsrc/meshing/meshtype.cpp | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/libsrc/meshing/meshtype.cpp b/libsrc/meshing/meshtype.cpp index 0b0720a8..a6bbbf11 100644 --- a/libsrc/meshing/meshtype.cpp +++ b/libsrc/meshing/meshtype.cpp @@ -1973,7 +1973,7 @@ namespace netgen { case TET: { - dshape = 0; + dshape = T(0.0); dshape(0,0) = 1; dshape(1,1) = 1; dshape(2,2) = 1; @@ -1984,7 +1984,7 @@ namespace netgen } case PRISM: { - dshape = 0; + dshape = T(0.0); dshape(0,0) = 1-p(2); dshape(0,2) = -p(0); dshape(1,1) = 1-p(2); @@ -2007,22 +2007,29 @@ namespace netgen { int np = GetNP(); double eps = 1e-6; - Vector shaper(np), shapel(np); + ArrayMem mem(2*np); + TFlatVector shaper(np, &mem[0]); + TFlatVector shapel(np, &mem[np]); + // Vector shaper(np), shapel(np); - for (int i = 1; i <= 3; i++) + for (int i = 0; i < 3; i++) { - Point3d pr(p), pl(p); - pr.X(i) += eps; - pl.X(i) -= eps; + Point<3,T> pr(p), pl(p); + pr(i) += eps; + pl(i) -= eps; GetShapeNew (pr, shaper); GetShapeNew (pl, shapel); for (int j = 0; j < np; j++) - dshape(j, i-1) = (shaper(j) - shapel(j)) / (2 * eps); + dshape(j, i) = (shaper(j) - shapel(j)) / (2 * eps); } } } } + + template void Element::GetDShapeNew (const Point<3> &, MatrixFixWidth<3> &) const; + template void Element::GetDShapeNew> (const Point<3,SIMD> &, MatrixFixWidth<3,SIMD> &) const; + void Element :: GetPointMatrix (const T_POINTS & points, From 86fdb04687372dc7b5f6e5d7d638a0825863f84e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Thu, 7 Jul 2016 18:31:09 +0200 Subject: [PATCH 04/10] templated and instantiated CalcShape --- libsrc/linalg/vector.hpp | 86 +++++++++++++++-------------- libsrc/meshing/curvedelems.cpp | 2 +- libsrc/meshing/meshtype.cpp | 23 ++++---- libsrc/meshing/meshtype.hpp | 2 +- libsrc/visualization/vssolution.cpp | 4 +- 5 files changed, 63 insertions(+), 54 deletions(-) diff --git a/libsrc/linalg/vector.hpp b/libsrc/linalg/vector.hpp index 1b103d0e..dec8b57d 100644 --- a/libsrc/linalg/vector.hpp +++ b/libsrc/linalg/vector.hpp @@ -8,6 +8,50 @@ /* *************************************************************************/ + + +template +class TFlatVector +{ +protected: + int s; + T * data; +public: + TFlatVector () { ; } + TFlatVector (int as, T * adata) + { s = as; data = adata; } + + int Size () const + { return s; } + + TFlatVector & operator= (const TFlatVector & v) + { memcpy (data, v.data, s*sizeof(T)); return *this; } + + TFlatVector & operator= (T scal) + { + for (int i = 0; i < s; i++) data[i] = scal; + return *this; + } + + T & operator[] (int i) { return data[i]; } + const T & operator[] (int i) const { return data[i]; } + T & operator() (int i) { return data[i]; } + const T & operator() (int i) const { return data[i]; } + + // double & Elem (int i) { return data[i-1]; } + // const double & Get (int i) const { return data[i-1]; } + // void Set (int i, double val) { data[i-1] = val; } + + TFlatVector & operator*= (T scal) + { + for (int i = 0; i < s; i++) data[i] *= scal; + return *this; + } +}; + + + + class FlatVector { protected: @@ -75,50 +119,11 @@ public: return sqrt (sum); } + operator TFlatVector () const { return TFlatVector (s, data); } friend double operator* (const FlatVector & v1, const FlatVector & v2); }; -template -class TFlatVector -{ -protected: - int s; - T * data; -public: - TFlatVector () { ; } - TFlatVector (int as, T * adata) - { s = as; data = adata; } - - int Size () const - { return s; } - - TFlatVector & operator= (const TFlatVector & v) - { memcpy (data, v.data, s*sizeof(T)); return *this; } - - TFlatVector & operator= (T scal) - { - for (int i = 0; i < s; i++) data[i] = scal; - return *this; - } - - T & operator[] (int i) { return data[i]; } - const T & operator[] (int i) const { return data[i]; } - T & operator() (int i) { return data[i]; } - const T & operator() (int i) const { return data[i]; } - - // double & Elem (int i) { return data[i-1]; } - // const double & Get (int i) const { return data[i-1]; } - // void Set (int i, double val) { data[i-1] = val; } - - TFlatVector & operator*= (T scal) - { - for (int i = 0; i < s; i++) data[i] *= scal; - return *this; - } -}; - - class Vector : public FlatVector @@ -154,6 +159,7 @@ public: } } + operator TFlatVector () const { return TFlatVector (s, data); } }; template diff --git a/libsrc/meshing/curvedelems.cpp b/libsrc/meshing/curvedelems.cpp index f0763cdd..2e33e50d 100644 --- a/libsrc/meshing/curvedelems.cpp +++ b/libsrc/meshing/curvedelems.cpp @@ -2313,7 +2313,7 @@ namespace netgen double lami[8]; FlatVector vlami(8, lami); vlami = 0; - mesh[elnr].GetShapeNew (xi, vlami); + mesh[elnr].GetShapeNew (xi, vlami); Mat<3,3> trans, dxdxic; if (dxdxi) diff --git a/libsrc/meshing/meshtype.cpp b/libsrc/meshing/meshtype.cpp index a6bbbf11..14048e9f 100644 --- a/libsrc/meshing/meshtype.cpp +++ b/libsrc/meshing/meshtype.cpp @@ -1850,8 +1850,8 @@ namespace netgen } - - void Element :: GetShapeNew (const Point<3> & p, FlatVector & shape) const + template + void Element :: GetShapeNew (const Point<3,T> & p, TFlatVector shape) const { /* if (shape.Size() < GetNP()) @@ -1874,10 +1874,10 @@ namespace netgen case TET10: { - double lam1 = p(0); - double lam2 = p(1); - double lam3 = p(2); - double lam4 = 1-p(0)-p(1)-p(2); + T lam1 = p(0); + T lam2 = p(1); + T lam3 = p(2); + T lam4 = 1-p(0)-p(1)-p(2); shape(0) = 2 * lam1 * (lam1-0.5); shape(1) = 2 * lam2 * (lam2-0.5); @@ -1897,11 +1897,12 @@ namespace netgen case PYRAMID: { - double noz = 1-p(2); - if (noz == 0.0) noz = 1e-10; + T noz = 1-p(2); + // if (noz == 0.0) noz = 1e-10; + noz += T(1e-12); - double xi = p(0) / noz; - double eta = p(1) / noz; + T xi = p(0) / noz; + T eta = p(1) / noz; shape(0) = (1-xi)*(1-eta) * (noz); shape(1) = ( xi)*(1-eta) * (noz); shape(2) = ( xi)*( eta) * (noz); @@ -2026,6 +2027,8 @@ namespace netgen } } } + template void Element :: GetShapeNew (const Point<3,double> & p, TFlatVector shape) const; + template void Element :: GetShapeNew (const Point<3,SIMD> & p, TFlatVector> shape) const; template void Element::GetDShapeNew (const Point<3> &, MatrixFixWidth<3> &) const; template void Element::GetDShapeNew> (const Point<3,SIMD> &, MatrixFixWidth<3,SIMD> &) const; diff --git a/libsrc/meshing/meshtype.hpp b/libsrc/meshing/meshtype.hpp index 19056e21..8e1be87d 100644 --- a/libsrc/meshing/meshtype.hpp +++ b/libsrc/meshing/meshtype.hpp @@ -716,7 +716,7 @@ namespace netgen class DenseMatrix & trans) const; void GetShape (const Point<3> & p, class Vector & shape) const; - void GetShapeNew (const Point<3> & p, class FlatVector & shape) const; + // void GetShapeNew (const Point<3> & p, class FlatVector & shape) const; template void GetShapeNew (const Point<3,T> & p, TFlatVector shape) const; /// matrix 2 * np diff --git a/libsrc/visualization/vssolution.cpp b/libsrc/visualization/vssolution.cpp index 8b7371bb..fbe8d732 100644 --- a/libsrc/visualization/vssolution.cpp +++ b/libsrc/visualization/vssolution.cpp @@ -1846,7 +1846,7 @@ namespace netgen for (int i = 0; i < cnt_valid; i++) { - el.GetShapeNew (locgrid[i], shape); + el.GetShapeNew (locgrid[i], shape); Point<3> pglob; for (int j = 0; j < 3; j++) { @@ -3913,7 +3913,7 @@ namespace netgen for (int i = 0; i < cnt_valid; i++) { - el.GetShapeNew (locgrid[i], shape); + el.GetShapeNew (locgrid[i], shape); Point<3> pglob; for (int j = 0; j < 3; j++) { From 59bc017c40afcfa06476fc1d81560cc3e322f608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Thu, 7 Jul 2016 19:49:02 +0200 Subject: [PATCH 05/10] bring std-operators to namespace --- libsrc/general/mysimd.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libsrc/general/mysimd.hpp b/libsrc/general/mysimd.hpp index ed67dbec..d799f288 100644 --- a/libsrc/general/mysimd.hpp +++ b/libsrc/general/mysimd.hpp @@ -219,6 +219,9 @@ namespace netgen inline SIMD operator*= (SIMD & a, SIMD b) { return a.Data()*=b.Data(); } inline SIMD operator/= (SIMD & a, SIMD b) { return a.Data()/=b.Data(); } + using std::sqrt; + using std::fabs; + inline SIMD sqrt (SIMD a) { return std::sqrt(a.Data()); } inline SIMD fabs (SIMD a) { return std::fabs(a.Data()); } inline SIMD L2Norm2 (SIMD a) { return a.Data()*a.Data(); } From 0a9adc91e937eb2dce6be4d8a85a4d8e895f5b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Thu, 7 Jul 2016 19:54:47 +0200 Subject: [PATCH 06/10] default simd ctr --- libsrc/general/mysimd.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libsrc/general/mysimd.hpp b/libsrc/general/mysimd.hpp index d799f288..37a66c5c 100644 --- a/libsrc/general/mysimd.hpp +++ b/libsrc/general/mysimd.hpp @@ -168,13 +168,19 @@ namespace netgen SIMD () = default; SIMD (const SIMD &) = default; SIMD & operator= (const SIMD &) = default; - + + SIMD (double val) + : data(val) { ; } + + /* template SIMD (const T & val) { // SIMD_function(val, std::is_convertible>()); SIMD_function(val, has_call_operator::value); } + */ + template SIMD & operator= (const T & val) From e1f7a5f5f2c4c82ef21eea910e6bc4df992f0bcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Mon, 11 Jul 2016 14:55:35 +0200 Subject: [PATCH 07/10] low memory operations --- libsrc/meshing/curvedelems.cpp | 69 +++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/libsrc/meshing/curvedelems.cpp b/libsrc/meshing/curvedelems.cpp index 2e33e50d..12a0c5a9 100644 --- a/libsrc/meshing/curvedelems.cpp +++ b/libsrc/meshing/curvedelems.cpp @@ -300,6 +300,32 @@ namespace netgen values[i+1] = p1; } } + + template + void EvaluateScaledLambda (int n, S x, S y, FUNC func) + { + S p1(1.0), p2(0.0), p3; + + if (n >= 0) + { + p2 = 1.0; + func(0, p2); + } + if (n >= 1) + { + p1 = a[0]*y+b[0]*x; + func(1, p1); + } + + for (int i = 1; i < n; i++) + { + p3 = p2; p2=p1; + p1 = (a[i]*y+b[i]*x)*p2-c[i]*y*y*p3; + func(i+1, p1); + } + } + + }; class JacobiRecPol : public RecPol @@ -455,17 +481,32 @@ namespace netgen { if (n < 3) return; - Tx hx[50], hy[50]; - ScaledJacobiPolynomial (n-3, x, t-y, 2, 2, hx); + // Tx hx[50], hy[50]; + // ScaledJacobiPolynomial (n-3, x, t-y, 2, 2, hx); + /* + jacpols2[2]->EvaluateScaled (n-3, x, t-y, hx); int ii = 0; Tx bub = (t+x-y)*y*(t-x-y); for (int ix = 0; ix <= n-3; ix++) { - jacpols2[2*ix+5] -> EvaluateScaled (n-3, 2*y-1, t, hy); - for (int iy = 0; iy <= n-3-ix; iy++) - func(ii++, bub * hx[ix]*hy[iy]); + jacpols2[2*ix+5] -> EvaluateScaledLambda (n-3-ix, 2*y-1, t, [&](int i, Ty valy) + { + func(ii++, bub*hx[ix]*valy); + }); } + */ + int ii = 0; + Tx bub = (t+x-y)*y*(t-x-y); + jacpols2[2]->EvaluateScaledLambda + (n-3, x, t-y, + [&](int ix, Tx valx) + { + jacpols2[2*ix+5] -> EvaluateScaledLambda (n-3-ix, 2*y-1, t, [&](int iy, Ty valy) + { + func(ii++, bub*valx*valy); + }); + }); } @@ -3338,7 +3379,7 @@ namespace netgen */ } - + // extern int mappingvar; template bool CurvedElements :: EvaluateMapping (ElementInfo & info, Point<3,T> xi, Point<3,T> & mx, Mat<3,3,T> & jac) const @@ -3434,7 +3475,7 @@ namespace netgen } if (info.order == 1) break; - + AutoDiff<3,T> mu[8] = { (1-x)+(1-y)+(1-z), x +(1-y)+(1-z), @@ -3445,7 +3486,6 @@ namespace netgen x + y +(z), (1-x)+ y +(z), }; - int ii = 8; const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (HEX); @@ -3458,12 +3498,6 @@ namespace netgen int vi1 = edges[i][0]-1, vi2 = edges[i][1]-1; if (el[vi1] > el[vi2]) swap (vi1, vi2); - /* - CalcEdgeShape (eorder, mu[vi1]-mu[vi2], &shapes(ii)); - T lame = shapes(vi1)+shapes(vi2); - for (int j = 0; j < order-1; j++) - shapes(ii+j) *= lame; - */ AutoDiff<3,T> lame = lami[vi1]+lami[vi2]; CalcEdgeShapeLambda (eorder, mu[vi1]-mu[vi2], [&](int i, AutoDiff<3,T> shape) @@ -3475,7 +3509,6 @@ namespace netgen } } - break; } @@ -4031,7 +4064,7 @@ namespace netgen } - + // extern int multipointtrafovar; template void CurvedElements :: CalcMultiPointElementTransformation (ElementIndex elnr, int n, @@ -4039,6 +4072,7 @@ namespace netgen T * x, size_t sx, T * dxdxi, size_t sdxdxi) { + // multipointtrafovar++; static int timer = NgProfiler::CreateTimer ("calcmultipointelementtrafo"); static int timer1 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 1"); static int timer2 = NgProfiler::CreateTimer ("calcmultipointelementtrafo 2"); @@ -4156,7 +4190,7 @@ namespace netgen // NgProfiler::StopTimer (timer2); // NgProfiler::StartTimer (timer3); - /* + bool ok = true; for (int i = 0; i < n; i++) { @@ -4175,7 +4209,6 @@ namespace netgen dxdxi[i*sdxdxi+3*j+k] = _dxdxi(j,k); } if (ok) return; - */ ArrayMem,100> coefs(info.ndof); ArrayMem shapes_mem(info.ndof); From 423c4c6ac5e30e052d05c4f555ef60ad8fcefd57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Mon, 11 Jul 2016 18:27:44 +0200 Subject: [PATCH 08/10] simd-mapping also for 2D --- libsrc/gprim/geomops.hpp | 15 +- libsrc/interface/nginterface_v2.cpp | 7 + libsrc/meshing/curvedelems.cpp | 280 +++++++++++++++++++--------- libsrc/meshing/curvedelems.hpp | 18 +- libsrc/meshing/meshtype.cpp | 11 +- libsrc/meshing/meshtype.hpp | 6 +- 6 files changed, 229 insertions(+), 108 deletions(-) diff --git a/libsrc/gprim/geomops.hpp b/libsrc/gprim/geomops.hpp index 4648e685..d08eef30 100644 --- a/libsrc/gprim/geomops.hpp +++ b/libsrc/gprim/geomops.hpp @@ -247,13 +247,14 @@ namespace netgen } */ - inline Mat<2,2> operator* (const Mat<2,2> & a, const Mat<2,2> & b) + template + inline Mat<2,2,T> operator* (const Mat<2,2,T> & a, const Mat<2,2,T> & b) { - Mat<2,2> m; + Mat<2,2,T> m; for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) { - double sum = 0; + T sum(0); for (int k = 0; k < 2; k++) sum += a(i,k) * b(k, j); m(i,j) = sum; @@ -275,14 +276,14 @@ namespace netgen return m; } - - inline Mat<3,2> operator* (const Mat<3,2> & a, const Mat<2,2> & b) + template + inline Mat<3,2,T> operator* (const Mat<3,2,T> & a, const Mat<2,2,T> & b) { - Mat<3,2> m; + Mat<3,2,T> m; for (int i = 0; i < 3; i++) for (int j = 0; j < 2; j++) { - double sum = 0; + T sum(0.0); for (int k = 0; k < 2; k++) sum += a(i,k) * b(k, j); m(i,j) = sum; diff --git a/libsrc/interface/nginterface_v2.cpp b/libsrc/interface/nginterface_v2.cpp index 1b9b3eee..4dc80ebc 100644 --- a/libsrc/interface/nginterface_v2.cpp +++ b/libsrc/interface/nginterface_v2.cpp @@ -617,6 +617,12 @@ namespace netgen __m256d * x, size_t sx, __m256d * dxdxi, size_t sdxdxi) const { + mesh->GetCurvedElements().CalcMultiPointSurfaceTransformation<2> + (elnr, npts, + reinterpret_cast*> (xi), sxi, + reinterpret_cast*> (x), sx, + reinterpret_cast*> (dxdxi), sdxdxi); + /* for (int i = 0; i < npts; i++) { double hxi[4][2]; @@ -637,6 +643,7 @@ namespace netgen x += sx; dxdxi += sdxdxi; } + */ } template<> DLL_HEADER void Ngx_Mesh :: diff --git a/libsrc/meshing/curvedelems.cpp b/libsrc/meshing/curvedelems.cpp index 12a0c5a9..abd3fd9f 100644 --- a/libsrc/meshing/curvedelems.cpp +++ b/libsrc/meshing/curvedelems.cpp @@ -480,22 +480,6 @@ namespace netgen static void CalcScaledTrigShapeLambda (int n, Tx x, Ty y, Tt t, FUNC func) { if (n < 3) return; - - // Tx hx[50], hy[50]; - // ScaledJacobiPolynomial (n-3, x, t-y, 2, 2, hx); - - /* - jacpols2[2]->EvaluateScaled (n-3, x, t-y, hx); - int ii = 0; - Tx bub = (t+x-y)*y*(t-x-y); - for (int ix = 0; ix <= n-3; ix++) - { - jacpols2[2*ix+5] -> EvaluateScaledLambda (n-3-ix, 2*y-1, t, [&](int i, Ty valy) - { - func(ii++, bub*hx[ix]*valy); - }); - } - */ int ii = 0; Tx bub = (t+x-y)*y*(t-x-y); jacpols2[2]->EvaluateScaledLambda @@ -1758,7 +1742,7 @@ namespace netgen ArrayMem,100> coefs(info.ndof); ArrayMem shapes_mem(info.ndof); - Vector shapes(info.ndof, &shapes_mem[0]); + TFlatVector shapes(info.ndof, &shapes_mem[0]); ArrayMem dshapes_mem(2*info.ndof); MatrixFixWidth<2> dshapes(info.ndof, &dshapes_mem[0]); @@ -1787,25 +1771,25 @@ namespace netgen - + template void CurvedElements :: - CalcElementShapes (SurfaceElementInfo & info, const Point<2> & xi, Vector & shapes) const + CalcElementShapes (SurfaceElementInfo & info, const Point<2,T> xi, TFlatVector shapes) const { const Element2d & el = mesh[info.elnr]; - shapes.SetSize(info.ndof); - + // shapes.SetSize(info.ndof); + if (rational && info.order >= 2) { - shapes.SetSize(6); - double w = 1; - double lami[3] = { xi(0), xi(1), 1-xi(0)-xi(1) }; + // shapes.SetSize(6); + T w(1); + T lami[3] = { xi(0), xi(1), 1-xi(0)-xi(1) }; for (int j = 0; j < 3; j++) shapes(j) = lami[j] * lami[j]; const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TRIG); for (int j = 0; j < 3; j++) { - double wi = edgeweight[info.edgenrs[j]]; + T wi = edgeweight[info.edgenrs[j]]; shapes(j+3) = 2 * wi * lami[edges[j][0]-1] * lami[edges[j][1]-1]; w += (wi-1) * 2 * lami[edges[j][0]-1] * lami[edges[j][1]-1]; } @@ -1865,9 +1849,9 @@ namespace netgen } else { - double x = xi(0); - double y = xi(1); - double lam3 = 1-x-y; + T x = xi(0); + T y = xi(1); + T lam3 = 1-x-y; shapes(0) = x * (2*x-1); shapes(1) = y * (2*y-1); @@ -1888,7 +1872,7 @@ namespace netgen if (info.order == 1) return; - double mu[4] = { + T mu[4] = { 1 - xi(0) + 1 - xi(1), xi(0) + 1 - xi(1), xi(0) + xi(1), @@ -1907,7 +1891,7 @@ namespace netgen if (el[vi1] > el[vi2]) swap (vi1, vi2); CalcEdgeShape (eorder, mu[vi1]-mu[vi2], &shapes(ii)); - double lame = shapes(vi1)+shapes(vi2); + T lame = shapes(vi1)+shapes(vi2); for (int j = 0; j < order-1; j++) shapes(ii+j) *= lame; ii += eorder-1; @@ -1925,14 +1909,14 @@ namespace netgen }; } - + template void CurvedElements :: - CalcElementDShapes (SurfaceElementInfo & info, const Point<2> & xi, MatrixFixWidth<2> & dshapes) const + CalcElementDShapes (SurfaceElementInfo & info, const Point<2,T> xi, MatrixFixWidth<2,T> dshapes) const { const Element2d & el = mesh[info.elnr]; ELEMENT_TYPE type = el.GetType(); - double lami[4]; + T lami[4]; dshapes.SetSize(info.ndof); // dshapes = 0; @@ -1941,13 +1925,13 @@ namespace netgen if (rational && info.order >= 2) { - double w = 1; - double dw[2] = { 0, 0 }; + T w = 1; + T dw[2] = { 0, 0 }; lami[0] = xi(0); lami[1] = xi(1); lami[2] = 1-xi(0)-xi(1); - double dlami[3][2] = { { 1, 0 }, { 0, 1 }, { -1, -1 }}; - double shapes[6]; + T dlami[3][2] = { { 1, 0 }, { 0, 1 }, { -1, -1 }}; + T shapes[6]; for (int j = 0; j < 3; j++) { @@ -1959,7 +1943,7 @@ namespace netgen const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TRIG); for (int j = 0; j < 3; j++) { - double wi = edgeweight[info.edgenrs[j]]; + T wi = edgeweight[info.edgenrs[j]]; shapes[j+3] = 2 * wi * lami[edges[j][0]-1] * lami[edges[j][1]-1]; for (int k = 0; k < 2; k++) @@ -2016,7 +2000,7 @@ namespace netgen CalcScaledEdgeShapeDxDt<2> (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], &dshapes(ii,0)); - Mat<2,2> trans; + Mat<2,2,T> trans; for (int j = 0; j < 2; j++) { trans(0,j) = dshapes(vi1,j)-dshapes(vi2,j); @@ -2025,8 +2009,8 @@ namespace netgen for (int j = 0; j < eorder-1; j++) { - double ddx = dshapes(ii+j,0); - double ddt = dshapes(ii+j,1); + T ddx = dshapes(ii+j,0); + T ddt = dshapes(ii+j,1); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); } @@ -2049,7 +2033,7 @@ namespace netgen 1-lami[fnums[1]]-lami[fnums[0]], &dshapes(ii,0)); int nd = (forder-1)*(forder-2)/2; - Mat<2,2> trans; + Mat<2,2,T> trans; for (int j = 0; j < 2; j++) { trans(0,j) = dshapes(fnums[1],j)-dshapes(fnums[0],j); @@ -2058,8 +2042,8 @@ namespace netgen for (int j = 0; j < nd; j++) { - double ddx = dshapes(ii+j,0); - double ddt = dshapes(ii+j,1); + T ddx = dshapes(ii+j,0); + T ddt = dshapes(ii+j,1); dshapes(ii+j,0) = ddx * trans(0,0) + ddt * trans(1,0); dshapes(ii+j,1) = ddx * trans(0,1) + ddt * trans(1,1); } @@ -2072,7 +2056,7 @@ namespace netgen { if (dshapes.Height() == 3) { - dshapes = 0.0; + dshapes = T(0.0); dshapes(0,0) = 1; dshapes(1,1) = 1; dshapes(2,0) = -1; @@ -2080,10 +2064,10 @@ namespace netgen } else { - AutoDiff<2> x(xi(0), 0); - AutoDiff<2> y(xi(1), 1); - AutoDiff<2> lam3 = 1-x-y; - AutoDiff<2> shapes[6]; + AutoDiff<2,T> x(xi(0), 0); + AutoDiff<2,T> y(xi(1), 1); + AutoDiff<2,T> lam3 = 1-x-y; + AutoDiff<2,T> shapes[6]; shapes[0] = x * (2*x-1); shapes[1] = y * (2*y-1); shapes[2] = lam3 * (2*lam3-1); @@ -2114,28 +2098,28 @@ namespace netgen if (info.order == 1) return; - double shapes[4] = { + T shapes[4] = { (1-xi(0))*(1-xi(1)), xi(0) *(1-xi(1)), xi(0) * xi(1) , (1-xi(0))* xi(1) }; - double mu[4] = { + T mu[4] = { 1 - xi(0) + 1 - xi(1), xi(0) + 1 - xi(1), xi(0) + xi(1), 1 - xi(0) + xi(1), }; - double dmu[4][2] = { + T dmu[4][2] = { { -1, -1 }, { 1, -1 }, { 1, 1 }, { -1, 1 } }; // double hshapes[20], hdshapes[20]; - ArrayMem hshapes(order+1), hdshapes(order+1); + ArrayMem hshapes(order+1), hdshapes(order+1); int ii = 4; const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (QUAD); @@ -2150,8 +2134,8 @@ namespace netgen CalcEdgeShapeDx (eorder, mu[vi1]-mu[vi2], &hshapes[0], &hdshapes[0]); - double lame = shapes[vi1]+shapes[vi2]; - double dlame[2] = { + T lame = shapes[vi1]+shapes[vi2]; + T dlame[2] = { dshapes(vi1, 0) + dshapes(vi2, 0), dshapes(vi1, 1) + dshapes(vi2, 1) }; @@ -2190,6 +2174,85 @@ namespace netgen }; } + template + bool CurvedElements :: + EvaluateMapping (SurfaceElementInfo & info, const Point<2,T> xi, Point & mx, Mat & jac) const + { + const Element2d & el = mesh[info.elnr]; + if (rational && info.order >= 2) return false; // not supported + + AutoDiff<2,T> x(xi(0), 0); + AutoDiff<2,T> y(xi(1), 1); + + AutoDiff<2,T> mapped_x[DIM_SPACE]; + for (int i = 0; i < DIM_SPACE; i++) + mapped_x[i] = AutoDiff<2,T>(0.0); + + switch (el.GetType()) + { + case TRIG: + { + // if (info.order >= 2) return false; // not yet supported + AutoDiff<2,T> lami[4] = { x, y, 1-x-y }; + for (int j = 0; j < 3; j++) + { + Point<3> p = mesh[el[j]]; + for (int k = 0; k < DIM_SPACE; k++) + mapped_x[k] += p(k) * lami[j]; + } + + const ELEMENT_EDGE * edges = MeshTopology::GetEdges1 (TRIG); + for (int i = 0; i < 3; i++) + { + int eorder = edgeorder[info.edgenrs[i]]; + if (eorder >= 2) + { + int first = edgecoeffsindex[info.edgenrs[i]]; + + int vi1 = edges[i][0]-1, vi2 = edges[i][1]-1; + if (el[vi1] > el[vi2]) swap (vi1, vi2); + + CalcScaledEdgeShapeLambda (eorder, lami[vi1]-lami[vi2], lami[vi1]+lami[vi2], + [&](int i, AutoDiff<2,T> shape) + { + for (int k = 0; k < DIM_SPACE; k++) + mapped_x[k] += edgecoeffs[first+i](k) * shape; + }); + } + } + + int forder = faceorder[info.facenr]; + if (forder >= 3) + { + int first = facecoeffsindex[info.facenr]; + + int fnums[] = { 0, 1, 2 }; + if (el[fnums[0]] > el[fnums[1]]) swap (fnums[0], fnums[1]); + if (el[fnums[1]] > el[fnums[2]]) swap (fnums[1], fnums[2]); + if (el[fnums[0]] > el[fnums[1]]) swap (fnums[0], fnums[1]); + + CalcScaledTrigShapeLambda (forder, + lami[fnums[1]]-lami[fnums[0]], lami[fnums[2]], AutoDiff<2,T>(1.0), + [&](int i, AutoDiff<2,T> shape) + { + for (int k = 0; k < DIM_SPACE; k++) + mapped_x[k] += facecoeffs[first+i](k) * shape; + }); + } + break; + } + default: + return false; + } + + for (int i = 0; i < DIM_SPACE; i++) + { + mx(i) = mapped_x[i].Value(); + for (int j = 0; j < 2; j++) + jac(i,j) = mapped_x[i].DValue(j); + } + return true; + } template void CurvedElements :: @@ -3516,8 +3579,6 @@ namespace netgen return false; } - // for (int i = 0; i < 3; i++) - // cout << mapped_x[i] << endl; for (int i = 0; i < 3; i++) { mx(i) = mapped_x[i].Value(); @@ -3654,12 +3715,12 @@ namespace netgen - template + template void CurvedElements :: CalcMultiPointSurfaceTransformation (SurfaceElementIndex elnr, int npts, - const double * xi, size_t sxi, - double * x, size_t sx, - double * dxdxi, size_t sdxdxi) + const T * xi, size_t sxi, + T * x, size_t sx, + T * dxdxi, size_t sdxdxi) { if (mesh.coarsemesh) { @@ -3667,18 +3728,18 @@ namespace netgen (*mesh.hpelements) [mesh[elnr].hp_elnr]; // xi umrechnen - double lami[4]; - FlatVector vlami(4, lami); + T lami[4]; + TFlatVector vlami(4, lami); - ArrayMem, 50> coarse_xi (npts); + ArrayMem, 50> coarse_xi (npts); for (int pi = 0; pi < npts; pi++) { vlami = 0; - Point<2> hxi(xi[pi*sxi], xi[pi*sxi+1]); + Point<2,T> hxi(xi[pi*sxi], xi[pi*sxi+1]); mesh[elnr].GetShapeNew ( hxi, vlami); - Point<2> cxi(0,0); + Point<2,T> cxi(0,0); for (int i = 0; i < hpref_el.np; i++) for (int j = 0; j < 2; j++) cxi(j) += hpref_el.param[i][j] * lami[i]; @@ -3687,29 +3748,29 @@ namespace netgen } mesh.coarsemesh->GetCurvedElements(). - CalcMultiPointSurfaceTransformation (hpref_el.coarse_elnr, npts, - &coarse_xi[0](0), &coarse_xi[1](0)-&coarse_xi[0](0), - x, sx, dxdxi, sdxdxi); + CalcMultiPointSurfaceTransformation (hpref_el.coarse_elnr, npts, + &coarse_xi[0](0), &coarse_xi[1](0)-&coarse_xi[0](0), + x, sx, dxdxi, sdxdxi); // Mat<3,2> dxdxic; if (dxdxi) { - MatrixFixWidth<2> dlami(4); - dlami = 0; + MatrixFixWidth<2,T> dlami(4); + dlami = T(0.0); for (int pi = 0; pi < npts; pi++) { - Point<2> hxi(xi[pi*sxi], xi[pi*sxi+1]); + Point<2,T> hxi(xi[pi*sxi], xi[pi*sxi+1]); mesh[elnr].GetDShapeNew ( hxi, dlami); - Mat<2,2> trans; + Mat<2,2,T> trans; trans = 0; for (int k = 0; k < 2; k++) for (int l = 0; l < 2; l++) for (int i = 0; i < hpref_el.np; i++) trans(l,k) += hpref_el.param[i][l] * dlami(i, k); - Mat hdxdxic, hdxdxi; + Mat hdxdxic, hdxdxi; for (int k = 0; k < 2*DIM_SPACE; k++) hdxdxic(k) = dxdxi[pi*sdxdxi+k]; @@ -3798,16 +3859,38 @@ namespace netgen } } + + + bool ok = true; + for (int i = 0; i < npts; i++) + { + Point<2,T> _xi(xi[i*sxi], xi[i*sxi+1]); + Point _x; + Mat _dxdxi; + if (!EvaluateMapping (info, _xi, _x, _dxdxi)) + { ok = false; break; } + // cout << "x = " << _x << ", dxdxi = " << _dxdxi << endl; + if (x) + for (int j = 0; j < DIM_SPACE; j++) + x[i*sx+j] = _x[j]; + if (dxdxi) + for (int j = 0; j < DIM_SPACE; j++) + for (int k = 0; k < 2; k++) + dxdxi[i*sdxdxi+2*j+k] = _dxdxi(j,k); + } + if (ok) return; + + // THESE LAST LINES ARE COPIED FROM CurvedElements::CalcSurfaceTransformation ArrayMem,100> coefs(info.ndof); GetCoefficients (info, coefs); - ArrayMem shapes_mem(info.ndof); - Vector shapes(info.ndof, &shapes_mem[0]); + ArrayMem shapes_mem(info.ndof); + TFlatVector shapes(info.ndof, &shapes_mem[0]); - // ArrayMem dshapes_mem(info.ndof*2); - MatrixFixWidth<2> dshapes; // (info.ndof,&shapes_mem[0]); + ArrayMem dshapes_mem(info.ndof*2); + MatrixFixWidth<2,T> dshapes(info.ndof,&shapes_mem[0]); @@ -3817,12 +3900,16 @@ namespace netgen { for (int j = 0; j < npts; j++) { - Point<2> vxi(xi[j*sxi], xi[j*sxi+1]); - - Point val (coefs[2]); - val += vxi(0) * (coefs[0]-coefs[2]); - val += vxi(1) * (coefs[1]-coefs[2]); + Point<2,T> vxi(xi[j*sxi], xi[j*sxi+1]); + Point val; + for (int k = 0; k < DIM_SPACE; k++) + val(k) = coefs[2](k) + (coefs[0](k)-coefs[2](k)) * vxi(0) + (coefs[1](k)-coefs[2](k)) * vxi(1); + /* + (coefs[2]); + val += (coefs[0]-coefs[2]) * vxi(0); + val += (coefs[1]-coefs[2]) * vxi(1); + */ for (int k = 0; k < DIM_SPACE; k++) x[j*sx+k] = val(k); } @@ -3830,12 +3917,13 @@ namespace netgen else for (int j = 0; j < npts; j++) { - Point<2> vxi(xi[j*sxi], xi[j*sxi+1]); + Point<2,T> vxi(xi[j*sxi], xi[j*sxi+1]); CalcElementShapes (info, vxi, shapes); - Point val = 0.0; + Point val = T(0.0); for (int i = 0; i < coefs.Size(); i++) - val += shapes(i) * coefs[i]; + for (int k = 0; k < DIM_SPACE; k++) + val(k) += shapes(i) * coefs[i](k); for (int k = 0; k < DIM_SPACE; k++) x[j*sx+k] = val(k); @@ -3846,10 +3934,10 @@ namespace netgen { if (info.order == 1 && type == TRIG) { - Point<2> xij(xi[0], xi[1]); + Point<2,T> xij(xi[0], xi[1]); CalcElementDShapes (info, xij, dshapes); - Mat<3,2> dxdxij; + Mat<3,2,T> dxdxij; dxdxij = 0.0; for (int i = 0; i < coefs.Size(); i++) for (int j = 0; j < DIM_SPACE; j++) @@ -3866,10 +3954,10 @@ namespace netgen { for (int j = 0; j < npts; j++) { - Point<2> vxi(xi[j*sxi], xi[j*sxi+1]); + Point<2,T> vxi(xi[j*sxi], xi[j*sxi+1]); CalcElementDShapes (info, vxi, dshapes); - Mat ds; + Mat ds; ds = 0.0; for (int i = 0; i < coefs.Size(); i++) for (int j = 0; j < DIM_SPACE; j++) @@ -3899,6 +3987,18 @@ namespace netgen double * dxdxi, size_t sdxdxi); + template void CurvedElements :: + CalcMultiPointSurfaceTransformation<2> (SurfaceElementIndex elnr, int npts, + const SIMD * xi, size_t sxi, + SIMD * x, size_t sx, + SIMD * dxdxi, size_t sdxdxi); + + template void CurvedElements :: + CalcMultiPointSurfaceTransformation<3> (SurfaceElementIndex elnr, int npts, + const SIMD * xi, size_t sxi, + SIMD * x, size_t sx, + SIMD * dxdxi, size_t sdxdxi); + diff --git a/libsrc/meshing/curvedelems.hpp b/libsrc/meshing/curvedelems.hpp index b10c5a58..90ae8d40 100644 --- a/libsrc/meshing/curvedelems.hpp +++ b/libsrc/meshing/curvedelems.hpp @@ -126,16 +126,15 @@ public: double * x, size_t sx, double * dxdxi, size_t sdxdxi); - void CalcMultiPointSurfaceTransformation (Array< Point<2> > * xi, SurfaceElementIndex elnr, Array< Point<3> > * x, Array< Mat<3,2> > * dxdxi); - template + template void CalcMultiPointSurfaceTransformation (SurfaceElementIndex elnr, int n, - const double * xi, size_t sxi, - double * x, size_t sx, - double * dxdxi, size_t sdxdxi); + const T * xi, size_t sxi, + T * x, size_t sx, + T * dxdxi, size_t sdxdxi); void CalcMultiPointElementTransformation (Array< Point<3> > * xi, ElementIndex elnr, Array< Point<3> > * x, @@ -217,10 +216,15 @@ private: int facenr; }; - void CalcElementShapes (SurfaceElementInfo & elinfo, const Point<2> & xi, Vector & shapes) const; + template + void CalcElementShapes (SurfaceElementInfo & elinfo, const Point<2,T> xi, TFlatVector shapes) const; template void GetCoefficients (SurfaceElementInfo & elinfo, Array > & coefs) const; - void CalcElementDShapes (SurfaceElementInfo & elinfo, const Point<2> & xi, MatrixFixWidth<2> & dshapes) const; + template + void CalcElementDShapes (SurfaceElementInfo & elinfo, const Point<2,T> xi, MatrixFixWidth<2,T> dshapes) const; + + template + bool EvaluateMapping (SurfaceElementInfo & info, const Point<2,T> xi, Point & x, Mat & jac) const; }; diff --git a/libsrc/meshing/meshtype.cpp b/libsrc/meshing/meshtype.cpp index 14048e9f..b5ef6f40 100644 --- a/libsrc/meshing/meshtype.cpp +++ b/libsrc/meshing/meshtype.cpp @@ -587,15 +587,15 @@ namespace netgen } - + template void Element2d :: - GetDShapeNew (const Point<2> & p, MatrixFixWidth<2> & dshape) const + GetDShapeNew (const Point<2,T> & p, MatrixFixWidth<2,T> & dshape) const { switch (typ) { case TRIG: { - dshape = 0; + dshape = T(0.0); dshape(0,0) = 1; dshape(1,1) = 1; dshape(2,0) = -1; @@ -2027,6 +2027,11 @@ namespace netgen } } } + + template void Element2d::GetDShapeNew (const Point<2> &, MatrixFixWidth<2> &) const; + template void Element2d::GetDShapeNew> (const Point<2,SIMD> &, MatrixFixWidth<2,SIMD> &) const; + + template void Element :: GetShapeNew (const Point<3,double> & p, TFlatVector shape) const; template void Element :: GetShapeNew (const Point<3,SIMD> & p, TFlatVector> shape) const; diff --git a/libsrc/meshing/meshtype.hpp b/libsrc/meshing/meshtype.hpp index 8e1be87d..866d0a30 100644 --- a/libsrc/meshing/meshtype.hpp +++ b/libsrc/meshing/meshtype.hpp @@ -447,9 +447,13 @@ namespace netgen void GetShape (const Point2d & p, class Vector & shape) const; void GetShapeNew (const Point<2> & p, class FlatVector & shape) const; + template + void GetShapeNew (const Point<2,T> & p, TFlatVector shape) const; /// matrix 2 * np void GetDShape (const Point2d & p, class DenseMatrix & dshape) const; - void GetDShapeNew (const Point<2> & p, class MatrixFixWidth<2> & dshape) const; + template + void GetDShapeNew (const Point<2,T> & p, class MatrixFixWidth<2,T> & dshape) const; + /// matrix 2 * np void GetPointMatrix (const Array & points, class DenseMatrix & pmat) const; From 5681d9c00a254a9d9c53fe655a4c742822e62fe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Mon, 11 Jul 2016 18:39:10 +0200 Subject: [PATCH 09/10] added missing template instantiation --- libsrc/meshing/meshtype.cpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/libsrc/meshing/meshtype.cpp b/libsrc/meshing/meshtype.cpp index b5ef6f40..2dadf981 100644 --- a/libsrc/meshing/meshtype.cpp +++ b/libsrc/meshing/meshtype.cpp @@ -541,6 +541,30 @@ namespace netgen } } + template + void Element2d :: GetShapeNew (const Point<2,T> & p, TFlatVector shape) const + { + switch (typ) + { + case TRIG: + { + shape(0) = p(0); + shape(1) = p(1); + shape(2) = 1-p(0)-p(1); + break; + } + + case QUAD: + { + shape(0) = (1-p(0))*(1-p(1)); + shape(1) = p(0) *(1-p(1)); + shape(2) = p(0) * p(1) ; + shape(3) = (1-p(0))* p(1) ; + break; + } + } + } + @@ -2028,6 +2052,9 @@ namespace netgen } } + template void Element2d :: GetShapeNew (const Point<2,double> & p, TFlatVector shape) const; + template void Element2d :: GetShapeNew (const Point<2,SIMD> & p, TFlatVector> shape) const; + template void Element2d::GetDShapeNew (const Point<2> &, MatrixFixWidth<2> &) const; template void Element2d::GetDShapeNew> (const Point<2,SIMD> &, MatrixFixWidth<2,SIMD> &) const; From 78297720b5a77d35bdb070536fb6af0fddcda708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Tue, 12 Jul 2016 12:45:21 +0200 Subject: [PATCH 10/10] surfacetrafo simded --- libsrc/interface/nginterface_v2.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libsrc/interface/nginterface_v2.cpp b/libsrc/interface/nginterface_v2.cpp index 4dc80ebc..db3347fa 100644 --- a/libsrc/interface/nginterface_v2.cpp +++ b/libsrc/interface/nginterface_v2.cpp @@ -725,6 +725,12 @@ namespace netgen __m256d * x, size_t sx, __m256d * dxdxi, size_t sdxdxi) const { + mesh->GetCurvedElements().CalcMultiPointSurfaceTransformation<3> + (elnr, npts, + reinterpret_cast*> (xi), sxi, + reinterpret_cast*> (x), sx, + reinterpret_cast*> (dxdxi), sdxdxi); + /* for (int i = 0; i < npts; i++) { double hxi[4][2]; @@ -745,6 +751,7 @@ namespace netgen x += sx; dxdxi += sdxdxi; } + */ } #endif