From 3a631f10cae42032c9e125995bda254beea2d4bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Sun, 30 Oct 2016 15:01:52 +0100 Subject: [PATCH] solution visualization using AVX --- libsrc/csg/python_csg.cpp | 19 +++++- libsrc/general/mysimd.hpp | 12 +++- libsrc/gprim/geomfuncs.hpp | 5 +- libsrc/gprim/geomobjects.hpp | 25 +++---- libsrc/gprim/geomops.hpp | 18 ++--- libsrc/gprim/transform3d.hpp | 3 + libsrc/visualization/soldata.hpp | 12 ++++ libsrc/visualization/vssolution.cpp | 100 +++++++++++++++++++++++++++- 8 files changed, 166 insertions(+), 28 deletions(-) diff --git a/libsrc/csg/python_csg.cpp b/libsrc/csg/python_csg.cpp index 487ab002..d8c65906 100644 --- a/libsrc/csg/python_csg.cpp +++ b/libsrc/csg/python_csg.cpp @@ -167,6 +167,9 @@ namespace netgen extern CSGeometry * ParseCSG (istream & istr); } + +static Transformation<3> global_trafo(Vec<3> (0,0,0)); + DLL_HEADER void ExportCSG() { ModuleScope module("csg"); @@ -186,10 +189,22 @@ DLL_HEADER void ExportCSG() ; bp::def ("Pnt", FunctionPointer - ([](double x, double y, double z) { return Point<3>(x,y,z); })); + ([](double x, double y, double z) { return global_trafo(Point<3>(x,y,z)); })); bp::def ("Pnt", FunctionPointer ([](double x, double y) { return Point<2>(x,y); })); + bp::def ("SetTransformation", FunctionPointer + ([](int dir, double angle) + { + if (dir > 0) + global_trafo.SetAxisRotation (dir, angle*M_PI/180); + else + global_trafo = Transformation<3> (Vec<3>(0,0,0)); + }), + (bp::arg("dir")=int(0), bp::arg("angle")=0)); + + + bp::class_> ("Vec2d", bp::init()) .def ("__str__", &ToString>) .def(bp::self+bp::self) @@ -209,7 +224,7 @@ DLL_HEADER void ExportCSG() ; bp::def ("Vec", FunctionPointer - ([] (double x, double y, double z) { return Vec<3>(x,y,z); })); + ([] (double x, double y, double z) { return global_trafo(Vec<3>(x,y,z)); })); bp::def ("Vec", FunctionPointer ([] (double x, double y) { return Vec<2>(x,y); })); diff --git a/libsrc/general/mysimd.hpp b/libsrc/general/mysimd.hpp index 621ea5a3..3cf94c5a 100644 --- a/libsrc/general/mysimd.hpp +++ b/libsrc/general/mysimd.hpp @@ -54,8 +54,18 @@ namespace netgen #ifdef __AVX__ + template + class AlignedAlloc + { + public: + void * operator new (size_t s) { return _mm_malloc(s, alignof(T)); } + void * operator new[] (size_t s) { return _mm_malloc(s, alignof(T)); } + void operator delete (void * p) { _mm_free(p); } + void operator delete[] (void * p) { _mm_free(p); } + }; + template<> - class alignas(32) SIMD + class alignas(32) SIMD : public AlignedAlloc> { __m256d data; diff --git a/libsrc/gprim/geomfuncs.hpp b/libsrc/gprim/geomfuncs.hpp index 66cbca81..ea0070df 100644 --- a/libsrc/gprim/geomfuncs.hpp +++ b/libsrc/gprim/geomfuncs.hpp @@ -82,9 +82,10 @@ namespace netgen */ // inline Vec<3> Cross (const Vec<3> & v1, const Vec<3> & v2) - inline Vec<3> Cross (Vec<3> v1, Vec<3> v2) + template + inline Vec<3,T> Cross (Vec<3,T> v1, Vec<3,T> v2) { - return Vec<3> + return Vec<3,T> ( v1(1) * v2(2) - v1(2) * v2(1), v1(2) * v2(0) - v1(0) * v2(2), v1(0) * v2(1) - v1(1) * v2(0) ); diff --git a/libsrc/gprim/geomobjects.hpp b/libsrc/gprim/geomobjects.hpp index d4ca640f..d483f57c 100644 --- a/libsrc/gprim/geomobjects.hpp +++ b/libsrc/gprim/geomobjects.hpp @@ -17,7 +17,7 @@ namespace netgen template - class Point + class Point : public AlignedAlloc> { protected: @@ -39,8 +39,9 @@ namespace netgen Point (T ax, T ay, T az, T au) { x[0] = ax; x[1] = ay; x[2] = az; x[3] = au;} - Point (const Point & p2) - { for (int i = 0; i < D; i++) x[i] = p2.x[i]; } + template + Point (const Point & p2) + { for (int i = 0; i < D; i++) x[i] = p2(i); } explicit Point (const Vec & v) { for (int i = 0; i < D; i++) x[i] = v(i); } @@ -65,7 +66,7 @@ namespace netgen }; template - class Vec + class Vec : public AlignedAlloc> { protected: @@ -90,15 +91,15 @@ namespace netgen Vec (const Vec & p2) { for (int i = 0; i < D; i++) x[i] = p2.x[i]; } - explicit Vec (const Point & p) + explicit Vec (const Point & p) { for (int i = 0; i < D; i++) x[i] = p(i); } - Vec (const Vec & p1, const Vec & p2) + Vec (const Vec & p1, const Vec & p2) { for(int i=0; i & p2) + Vec & operator= (const Vec & p2) { for (int i = 0; i < D; i++) x[i] = p2.x[i]; return *this; @@ -131,12 +132,12 @@ namespace netgen return l; } - const Vec & Normalize () + Vec & Normalize () { T l = Length(); - if (l != 0) - for (int i = 0; i < D; i++) - x[i] /= l; + // if (l != 0) + for (int i = 0; i < D; i++) + x[i] /= (l+1e-40); return *this; } @@ -148,7 +149,7 @@ namespace netgen template - class Mat + class Mat : public AlignedAlloc> { protected: diff --git a/libsrc/gprim/geomops.hpp b/libsrc/gprim/geomops.hpp index d08eef30..217c539b 100644 --- a/libsrc/gprim/geomops.hpp +++ b/libsrc/gprim/geomops.hpp @@ -29,10 +29,10 @@ namespace netgen - template - inline Point operator+ (const Point & a, const Vec & b) + template + inline Point operator+ (const Point & a, const Vec & b) { - Point res; + Point res; for (int i = 0; i < D; i++) res(i) = a(i) + b(i); return res; @@ -40,10 +40,10 @@ namespace netgen - template - inline Vec operator- (const Point & a, const Point & b) + template + inline Vec operator- (const Point & a, const Point & b) { - Vec res; + Vec res; for (int i = 0; i < D; i++) res(i) = a(i) - b(i); return res; @@ -69,10 +69,10 @@ namespace netgen - template - inline Vec operator* (double s, const Vec & b) + template + inline Vec operator* (T s, const Vec & b) { - Vec res; + Vec res; for (int i = 0; i < D; i++) res(i) = s * b(i); return res; diff --git a/libsrc/gprim/transform3d.hpp b/libsrc/gprim/transform3d.hpp index cd412433..49ce29ab 100644 --- a/libsrc/gprim/transform3d.hpp +++ b/libsrc/gprim/transform3d.hpp @@ -182,6 +182,9 @@ public: { to = m * from; } + + Point operator() (Point from) const { Point to; Transform(from, to); return to; } + Vec operator() (Vec from) const { Vec to; Transform(from, to); return to; } }; template diff --git a/libsrc/visualization/soldata.hpp b/libsrc/visualization/soldata.hpp index 1a7dfd3e..20028762 100644 --- a/libsrc/visualization/soldata.hpp +++ b/libsrc/visualization/soldata.hpp @@ -92,6 +92,18 @@ namespace netgen return res; } +#ifdef __AVX__ + virtual bool GetMultiSurfValue (size_t selnr, size_t facetnr, size_t npts, + const __m256d * xref, + const __m256d * x, + const __m256d * dxdxref, + __m256d * values) + { + cerr << "GetMultiSurfVaue not overloaded" << endl; + return false; + } +#endif + virtual bool GetSegmentValue (int segnr, double xref, double * values) { return false; } diff --git a/libsrc/visualization/vssolution.cpp b/libsrc/visualization/vssolution.cpp index fbe8d732..1ba4cad1 100644 --- a/libsrc/visualization/vssolution.cpp +++ b/libsrc/visualization/vssolution.cpp @@ -1256,8 +1256,16 @@ namespace netgen Array values(npt); Array mvalues(npt); +#ifdef __AVX__ + Array> > simd_pref ( (npt+SIMD::Size()-1)/SIMD::Size() ); + Array> > simd_points ( (npt+SIMD::Size()-1)/SIMD::Size() ); + Array> > simd_dxdxis ( (npt+SIMD::Size()-1)/SIMD::Size() ); + Array> > simd_nvs( (npt+SIMD::Size()-1)/SIMD::Size() ); + Array> simd_values( (npt+SIMD::Size()-1)/SIMD::Size() * sol->components); +#endif + if (sol && sol->draw_surface) mvalues.SetSize (npt * sol->components); - + Array > valuesc(npt); for (SurfaceElementIndex sei = 0; sei < nse; sei++) @@ -1436,6 +1444,93 @@ namespace netgen if ( el.GetType() == TRIG || el.GetType() == TRIG6 ) { +#ifdef __AVX_try_it_out__ + bool curved = curv.IsSurfaceElementCurved(sei); + + for (int iy = 0, ii = 0; iy <= n; iy++) + for (int ix = 0; ix <= n-iy; ix++, ii++) + pref[ii] = Point<2> (ix*invn, iy*invn); + + constexpr size_t simd_size = SIMD::Size(); + size_t simd_npt = (npt+simd_size-1)/simd_size; + + for (size_t i = 0; i < simd_npt; i++) + { + simd_pref[i](0).SIMD_function ([&] (size_t j) { size_t ii = i*simd_size+j; return (ii < npt) ? pref[ii](0) : 0; }, std::true_type()); + simd_pref[i](1).SIMD_function ([&] (size_t j) { size_t ii = i*simd_size+j; return (ii < npt) ? pref[ii](1) : 0; }, std::true_type()); + } + + if (curved) + { + mesh->GetCurvedElements(). + CalcMultiPointSurfaceTransformation<3> (sei, simd_npt, + &simd_pref[0](0), 2, + &simd_points[0](0), 3, + &simd_dxdxis[0](0,0), 6); + + for (size_t ii = 0; ii < simd_npt; ii++) + simd_nvs[ii] = Cross (simd_dxdxis[ii].Col(0), simd_dxdxis[ii].Col(1)).Normalize(); + } + else + { + Point<3,SIMD> p1 = mesh->Point (el[0]); + Point<3,SIMD> p2 = mesh->Point (el[1]); + Point<3,SIMD> p3 = mesh->Point (el[2]); + + Vec<3,SIMD> vx = p1-p3; + Vec<3,SIMD> vy = p2-p3; + for (size_t ii = 0; ii < simd_npt; ii++) + { + simd_points[ii] = p3 + simd_pref[ii](0) * vx + simd_pref[ii](1) * vy; + for (size_t j = 0; j < 3; j++) + { + simd_dxdxis[ii](j,0) = vx(j); + simd_dxdxis[ii](j,1) = vy(j); + } + } + + Vec<3,SIMD> nv = Cross (vx, vy).Normalize(); + for (size_t ii = 0; ii < simd_npt; ii++) + simd_nvs[ii] = nv; + } + + bool drawelem = false; + if (sol && sol->draw_surface) + { + drawelem = sol->solclass->GetMultiSurfValue (sei, -1, simd_npt, + &simd_pref[0](0).Data(), + &simd_points[0](0).Data(), + &simd_dxdxis[0](0).Data(), + &simd_values[0].Data()); + + for (size_t j = 0; j < sol->components; j++) + for (size_t i = 0; i < npt; i++) + mvalues[i*sol->components+j] = ((double*)&simd_values[j*simd_npt])[i]; + + if (usetexture == 2) + for (int ii = 0; ii < npt; ii++) + valuesc[ii] = ExtractValueComplex(sol, scalcomp, &mvalues[ii*sol->components]); + else + for (int ii = 0; ii < npt; ii++) + values[ii] = ExtractValue(sol, scalcomp, &mvalues[ii*sol->components]); + } + + for (size_t i = 0; i < npt; i++) + { + size_t ii = i/4; + size_t r = i%4; + for (int j = 0; j < 2; j++) + pref[i](j) = simd_pref[ii](j)[r]; + for (int j = 0; j < 3; j++) + points[i](j) = simd_points[ii](j)[r]; + for (int j = 0; j < 3; j++) + nvs[i](j) = simd_nvs[ii](j)[r]; + } + + if (deform) + for (int ii = 0; ii < npt; ii++) + points[ii] += GetSurfDeformation (sei, -1, pref[ii](0), pref[ii](1)); +#else bool curved = curv.IsSurfaceElementCurved(sei); for (int iy = 0, ii = 0; iy <= n; iy++) @@ -1492,7 +1587,8 @@ namespace netgen if (deform) for (int ii = 0; ii < npt; ii++) points[ii] += GetSurfDeformation (sei, -1, pref[ii](0), pref[ii](1)); - +#endif + int save_usetexture = usetexture; if (!drawelem) {