From 392eee9177a715c1d0291206a982d0d14be8449d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Sun, 10 Apr 2016 06:36:05 +0200 Subject: [PATCH] calling element-trafo with avx-types --- libsrc/include/nginterface_v2.hpp | 8 +-- libsrc/interface/nginterface_v2.cpp | 108 ++++++++++++++++++++++++++-- libsrc/meshing/meshfunc.cpp | 12 ++-- 3 files changed, 114 insertions(+), 14 deletions(-) diff --git a/libsrc/include/nginterface_v2.hpp b/libsrc/include/nginterface_v2.hpp index 69045016..5e1362e2 100644 --- a/libsrc/include/nginterface_v2.hpp +++ b/libsrc/include/nginterface_v2.hpp @@ -233,11 +233,11 @@ namespace netgen /// sxi ... step xi /// x ..... DIM_SPACE global coordinates /// dxdxi...DIM_SPACE x DIM_EL Jacobian matrix (row major storage) - template + template void MultiElementTransformation (int elnr, int npts, - const double * xi, size_t sxi, - double * x, size_t sx, - double * dxdxi, size_t sdxdxi) const; + const T * xi, size_t sxi, + T * x, size_t sx, + T * dxdxi, size_t sdxdxi) const; template diff --git a/libsrc/interface/nginterface_v2.cpp b/libsrc/interface/nginterface_v2.cpp index 5372bcd3..6b918e54 100644 --- a/libsrc/interface/nginterface_v2.cpp +++ b/libsrc/interface/nginterface_v2.cpp @@ -535,10 +535,6 @@ namespace netgen - - - - template <> DLL_HEADER void Ngx_Mesh :: @@ -602,6 +598,110 @@ namespace netgen + +#ifdef __AVX2__ +#include + + template<> DLL_HEADER void Ngx_Mesh :: + MultiElementTransformation<1,1> (int elnr, int npts, + const __m256d * xi, size_t sxi, + __m256d * x, size_t sx, + __m256d * dxdxi, size_t sdxdxi) const + { + cout << "multi-eltrafo simd called, 1,1,simd" << endl; + } + + template<> DLL_HEADER void Ngx_Mesh :: + MultiElementTransformation<2,2> (int elnr, int npts, + const __m256d * xi, size_t sxi, + __m256d * x, size_t sx, + __m256d * dxdxi, size_t sdxdxi) const + { + for (int i = 0; i < npts; i++) + { + double hxi[4][2]; + double hx[4][2]; + double hdxdxi[4][4]; + for (int j = 0; j < 4; j++) + for (int k = 0; k < 2; k++) + hxi[j][k] = ((double*)&(xi[k]))[j]; + MultiElementTransformation<2,2> (elnr, 4, &hxi[0][0], 2, &hx[0][0], 2, &hdxdxi[0][0], 4); + for (int j = 0; j < 4; j++) + for (int k = 0; k < 2; k++) + ((double*)&(x[k]))[j] = hx[j][k]; + for (int j = 0; j < 4; j++) + for (int k = 0; k < 4; k++) + ((double*)&(dxdxi[k]))[j] = hdxdxi[j][k]; + + xi += sxi; + x += sx; + dxdxi += sdxdxi; + } + } + + template<> DLL_HEADER void Ngx_Mesh :: + MultiElementTransformation<3,3> (int elnr, int npts, + const __m256d * xi, size_t sxi, + __m256d * x, size_t sx, + __m256d * dxdxi, size_t sdxdxi) const + { + for (int i = 0; i < npts; i++) + { + double hxi[4][3]; + double hx[4][3]; + double hdxdxi[4][9]; + for (int j = 0; j < 4; j++) + for (int k = 0; k < 3; k++) + hxi[j][k] = ((double*)&(xi[k]))[j]; + MultiElementTransformation<3,3> (elnr, 4, &hxi[0][0], 3, &hx[0][0], 3, &hdxdxi[0][0], 9); + for (int j = 0; j < 4; j++) + for (int k = 0; k < 3; k++) + ((double*)&(x[k]))[j] = hx[j][k]; + for (int j = 0; j < 4; j++) + for (int k = 0; k < 9; k++) + ((double*)&(dxdxi[k]))[j] = hdxdxi[j][k]; + + xi += sxi; + x += sx; + dxdxi += sdxdxi; + } + } + + template<> DLL_HEADER void Ngx_Mesh :: + MultiElementTransformation<0,1> (int elnr, int npts, + const __m256d * xi, size_t sxi, + __m256d * x, size_t sx, + __m256d * dxdxi, size_t sdxdxi) const + { + cout << "multi-eltrafo simd called, 0,1,simd" << endl; + } + + template<> DLL_HEADER void Ngx_Mesh :: + MultiElementTransformation<1,2> (int elnr, int npts, + const __m256d * xi, size_t sxi, + __m256d * x, size_t sx, + __m256d * dxdxi, size_t sdxdxi) const + { + cout << "multi-eltrafo simd called, 1,2,simd" << endl; + } + + template<> DLL_HEADER void Ngx_Mesh :: + MultiElementTransformation<2,3> (int elnr, int npts, + const __m256d * xi, size_t sxi, + __m256d * x, size_t sx, + __m256d * dxdxi, size_t sdxdxi) const + { + cout << "multi-eltrafo simd called, 2,3,simd" << endl; + } + +#endif + + + + + + + template <> DLL_HEADER int Ngx_Mesh :: FindElementOfPoint <1> (double * hp, double * lami, diff --git a/libsrc/meshing/meshfunc.cpp b/libsrc/meshing/meshfunc.cpp index ab6de3b2..e020b4ea 100644 --- a/libsrc/meshing/meshfunc.cpp +++ b/libsrc/meshing/meshfunc.cpp @@ -96,8 +96,8 @@ namespace netgen domain_bbox.Increase (0.01 * domain_bbox.Diam()); - for (int qstep = 1; qstep <= 3; qstep++) - // for (int qstep = 0; qstep <= 3; qstep++) // for hex-filling + // for (int qstep = 1; qstep <= 3; qstep++) + for (int qstep = 0; qstep <= 0; qstep++) // for hex-filling { // cout << "openquads = " << mesh3d.HasOpenQuads() << endl; if (mesh3d.HasOpenQuads()) @@ -108,8 +108,8 @@ namespace netgen switch (qstep) { case 0: - // rulefile = "/Users/joachim/gitlab/netgen/rules/hexa.rls"; - rulep = hexrules; + rulefile = "/Users/joachim/gitlab/netgen/rules/hexa.rls"; + // rulep = hexrules; break; case 1: rulefile += "/rules/prisms2.rls"; @@ -125,8 +125,8 @@ namespace netgen break; } - // Meshing3 meshing(rulefile); - Meshing3 meshing(rulep); + Meshing3 meshing(rulefile); + // Meshing3 meshing(rulep); MeshingParameters mpquad = mp;