calling element-trafo with avx-types

This commit is contained in:
Joachim Schöberl 2016-04-10 06:36:05 +02:00
parent 98bcd12f6a
commit 392eee9177
3 changed files with 114 additions and 14 deletions

View File

@ -233,11 +233,11 @@ namespace netgen
/// sxi ... step xi
/// x ..... DIM_SPACE global coordinates
/// dxdxi...DIM_SPACE x DIM_EL Jacobian matrix (row major storage)
template <int DIM_EL, int DIM_SPACE>
template <int DIM_EL, int DIM_SPACE, typename T>
void MultiElementTransformation (int elnr, int npts,
const double * xi, size_t sxi,
double * x, size_t sx,
double * dxdxi, size_t sdxdxi) const;
const T * xi, size_t sxi,
T * x, size_t sx,
T * dxdxi, size_t sdxdxi) const;
template <int DIM>

View File

@ -535,10 +535,6 @@ namespace netgen
template <> DLL_HEADER void Ngx_Mesh ::
@ -602,6 +598,110 @@ namespace netgen
#ifdef __AVX2__
#include <immintrin.h>
template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<1,1> (int elnr, int npts,
const __m256d * xi, size_t sxi,
__m256d * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const
{
cout << "multi-eltrafo simd called, 1,1,simd" << endl;
}
template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<2,2> (int elnr, int npts,
const __m256d * xi, size_t sxi,
__m256d * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const
{
for (int i = 0; i < npts; i++)
{
double hxi[4][2];
double hx[4][2];
double hdxdxi[4][4];
for (int j = 0; j < 4; j++)
for (int k = 0; k < 2; k++)
hxi[j][k] = ((double*)&(xi[k]))[j];
MultiElementTransformation<2,2> (elnr, 4, &hxi[0][0], 2, &hx[0][0], 2, &hdxdxi[0][0], 4);
for (int j = 0; j < 4; j++)
for (int k = 0; k < 2; k++)
((double*)&(x[k]))[j] = hx[j][k];
for (int j = 0; j < 4; j++)
for (int k = 0; k < 4; k++)
((double*)&(dxdxi[k]))[j] = hdxdxi[j][k];
xi += sxi;
x += sx;
dxdxi += sdxdxi;
}
}
template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<3,3> (int elnr, int npts,
const __m256d * xi, size_t sxi,
__m256d * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const
{
for (int i = 0; i < npts; i++)
{
double hxi[4][3];
double hx[4][3];
double hdxdxi[4][9];
for (int j = 0; j < 4; j++)
for (int k = 0; k < 3; k++)
hxi[j][k] = ((double*)&(xi[k]))[j];
MultiElementTransformation<3,3> (elnr, 4, &hxi[0][0], 3, &hx[0][0], 3, &hdxdxi[0][0], 9);
for (int j = 0; j < 4; j++)
for (int k = 0; k < 3; k++)
((double*)&(x[k]))[j] = hx[j][k];
for (int j = 0; j < 4; j++)
for (int k = 0; k < 9; k++)
((double*)&(dxdxi[k]))[j] = hdxdxi[j][k];
xi += sxi;
x += sx;
dxdxi += sdxdxi;
}
}
template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<0,1> (int elnr, int npts,
const __m256d * xi, size_t sxi,
__m256d * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const
{
cout << "multi-eltrafo simd called, 0,1,simd" << endl;
}
template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<1,2> (int elnr, int npts,
const __m256d * xi, size_t sxi,
__m256d * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const
{
cout << "multi-eltrafo simd called, 1,2,simd" << endl;
}
template<> DLL_HEADER void Ngx_Mesh ::
MultiElementTransformation<2,3> (int elnr, int npts,
const __m256d * xi, size_t sxi,
__m256d * x, size_t sx,
__m256d * dxdxi, size_t sdxdxi) const
{
cout << "multi-eltrafo simd called, 2,3,simd" << endl;
}
#endif
template <>
DLL_HEADER int Ngx_Mesh :: FindElementOfPoint <1>
(double * hp, double * lami,

View File

@ -96,8 +96,8 @@ namespace netgen
domain_bbox.Increase (0.01 * domain_bbox.Diam());
for (int qstep = 1; qstep <= 3; qstep++)
// for (int qstep = 0; qstep <= 3; qstep++) // for hex-filling
// for (int qstep = 1; qstep <= 3; qstep++)
for (int qstep = 0; qstep <= 0; qstep++) // for hex-filling
{
// cout << "openquads = " << mesh3d.HasOpenQuads() << endl;
if (mesh3d.HasOpenQuads())
@ -108,8 +108,8 @@ namespace netgen
switch (qstep)
{
case 0:
// rulefile = "/Users/joachim/gitlab/netgen/rules/hexa.rls";
rulep = hexrules;
rulefile = "/Users/joachim/gitlab/netgen/rules/hexa.rls";
// rulep = hexrules;
break;
case 1:
rulefile += "/rules/prisms2.rls";
@ -125,8 +125,8 @@ namespace netgen
break;
}
// Meshing3 meshing(rulefile);
Meshing3 meshing(rulep);
Meshing3 meshing(rulefile);
// Meshing3 meshing(rulep);
MeshingParameters mpquad = mp;