mirror of
https://github.com/NGSolve/netgen.git
synced 2025-01-13 14:40:35 +05:00
calling element-trafo with avx-types
This commit is contained in:
parent
98bcd12f6a
commit
392eee9177
@ -233,11 +233,11 @@ namespace netgen
|
|||||||
/// sxi ... step xi
|
/// sxi ... step xi
|
||||||
/// x ..... DIM_SPACE global coordinates
|
/// x ..... DIM_SPACE global coordinates
|
||||||
/// dxdxi...DIM_SPACE x DIM_EL Jacobian matrix (row major storage)
|
/// dxdxi...DIM_SPACE x DIM_EL Jacobian matrix (row major storage)
|
||||||
template <int DIM_EL, int DIM_SPACE>
|
template <int DIM_EL, int DIM_SPACE, typename T>
|
||||||
void MultiElementTransformation (int elnr, int npts,
|
void MultiElementTransformation (int elnr, int npts,
|
||||||
const double * xi, size_t sxi,
|
const T * xi, size_t sxi,
|
||||||
double * x, size_t sx,
|
T * x, size_t sx,
|
||||||
double * dxdxi, size_t sdxdxi) const;
|
T * dxdxi, size_t sdxdxi) const;
|
||||||
|
|
||||||
|
|
||||||
template <int DIM>
|
template <int DIM>
|
||||||
|
@ -537,10 +537,6 @@ namespace netgen
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <> DLL_HEADER void Ngx_Mesh ::
|
template <> DLL_HEADER void Ngx_Mesh ::
|
||||||
MultiElementTransformation<3,3> (int elnr, int npts,
|
MultiElementTransformation<3,3> (int elnr, int npts,
|
||||||
const double * xi, size_t sxi,
|
const double * xi, size_t sxi,
|
||||||
@ -602,6 +598,110 @@ namespace netgen
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __AVX2__
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
template<> DLL_HEADER void Ngx_Mesh ::
|
||||||
|
MultiElementTransformation<1,1> (int elnr, int npts,
|
||||||
|
const __m256d * xi, size_t sxi,
|
||||||
|
__m256d * x, size_t sx,
|
||||||
|
__m256d * dxdxi, size_t sdxdxi) const
|
||||||
|
{
|
||||||
|
cout << "multi-eltrafo simd called, 1,1,simd" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> DLL_HEADER void Ngx_Mesh ::
|
||||||
|
MultiElementTransformation<2,2> (int elnr, int npts,
|
||||||
|
const __m256d * xi, size_t sxi,
|
||||||
|
__m256d * x, size_t sx,
|
||||||
|
__m256d * dxdxi, size_t sdxdxi) const
|
||||||
|
{
|
||||||
|
for (int i = 0; i < npts; i++)
|
||||||
|
{
|
||||||
|
double hxi[4][2];
|
||||||
|
double hx[4][2];
|
||||||
|
double hdxdxi[4][4];
|
||||||
|
for (int j = 0; j < 4; j++)
|
||||||
|
for (int k = 0; k < 2; k++)
|
||||||
|
hxi[j][k] = ((double*)&(xi[k]))[j];
|
||||||
|
MultiElementTransformation<2,2> (elnr, 4, &hxi[0][0], 2, &hx[0][0], 2, &hdxdxi[0][0], 4);
|
||||||
|
for (int j = 0; j < 4; j++)
|
||||||
|
for (int k = 0; k < 2; k++)
|
||||||
|
((double*)&(x[k]))[j] = hx[j][k];
|
||||||
|
for (int j = 0; j < 4; j++)
|
||||||
|
for (int k = 0; k < 4; k++)
|
||||||
|
((double*)&(dxdxi[k]))[j] = hdxdxi[j][k];
|
||||||
|
|
||||||
|
xi += sxi;
|
||||||
|
x += sx;
|
||||||
|
dxdxi += sdxdxi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> DLL_HEADER void Ngx_Mesh ::
|
||||||
|
MultiElementTransformation<3,3> (int elnr, int npts,
|
||||||
|
const __m256d * xi, size_t sxi,
|
||||||
|
__m256d * x, size_t sx,
|
||||||
|
__m256d * dxdxi, size_t sdxdxi) const
|
||||||
|
{
|
||||||
|
for (int i = 0; i < npts; i++)
|
||||||
|
{
|
||||||
|
double hxi[4][3];
|
||||||
|
double hx[4][3];
|
||||||
|
double hdxdxi[4][9];
|
||||||
|
for (int j = 0; j < 4; j++)
|
||||||
|
for (int k = 0; k < 3; k++)
|
||||||
|
hxi[j][k] = ((double*)&(xi[k]))[j];
|
||||||
|
MultiElementTransformation<3,3> (elnr, 4, &hxi[0][0], 3, &hx[0][0], 3, &hdxdxi[0][0], 9);
|
||||||
|
for (int j = 0; j < 4; j++)
|
||||||
|
for (int k = 0; k < 3; k++)
|
||||||
|
((double*)&(x[k]))[j] = hx[j][k];
|
||||||
|
for (int j = 0; j < 4; j++)
|
||||||
|
for (int k = 0; k < 9; k++)
|
||||||
|
((double*)&(dxdxi[k]))[j] = hdxdxi[j][k];
|
||||||
|
|
||||||
|
xi += sxi;
|
||||||
|
x += sx;
|
||||||
|
dxdxi += sdxdxi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> DLL_HEADER void Ngx_Mesh ::
|
||||||
|
MultiElementTransformation<0,1> (int elnr, int npts,
|
||||||
|
const __m256d * xi, size_t sxi,
|
||||||
|
__m256d * x, size_t sx,
|
||||||
|
__m256d * dxdxi, size_t sdxdxi) const
|
||||||
|
{
|
||||||
|
cout << "multi-eltrafo simd called, 0,1,simd" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> DLL_HEADER void Ngx_Mesh ::
|
||||||
|
MultiElementTransformation<1,2> (int elnr, int npts,
|
||||||
|
const __m256d * xi, size_t sxi,
|
||||||
|
__m256d * x, size_t sx,
|
||||||
|
__m256d * dxdxi, size_t sdxdxi) const
|
||||||
|
{
|
||||||
|
cout << "multi-eltrafo simd called, 1,2,simd" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> DLL_HEADER void Ngx_Mesh ::
|
||||||
|
MultiElementTransformation<2,3> (int elnr, int npts,
|
||||||
|
const __m256d * xi, size_t sxi,
|
||||||
|
__m256d * x, size_t sx,
|
||||||
|
__m256d * dxdxi, size_t sdxdxi) const
|
||||||
|
{
|
||||||
|
cout << "multi-eltrafo simd called, 2,3,simd" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
DLL_HEADER int Ngx_Mesh :: FindElementOfPoint <1>
|
DLL_HEADER int Ngx_Mesh :: FindElementOfPoint <1>
|
||||||
(double * hp, double * lami,
|
(double * hp, double * lami,
|
||||||
|
@ -96,8 +96,8 @@ namespace netgen
|
|||||||
domain_bbox.Increase (0.01 * domain_bbox.Diam());
|
domain_bbox.Increase (0.01 * domain_bbox.Diam());
|
||||||
|
|
||||||
|
|
||||||
for (int qstep = 1; qstep <= 3; qstep++)
|
// for (int qstep = 1; qstep <= 3; qstep++)
|
||||||
// for (int qstep = 0; qstep <= 3; qstep++) // for hex-filling
|
for (int qstep = 0; qstep <= 0; qstep++) // for hex-filling
|
||||||
{
|
{
|
||||||
// cout << "openquads = " << mesh3d.HasOpenQuads() << endl;
|
// cout << "openquads = " << mesh3d.HasOpenQuads() << endl;
|
||||||
if (mesh3d.HasOpenQuads())
|
if (mesh3d.HasOpenQuads())
|
||||||
@ -108,8 +108,8 @@ namespace netgen
|
|||||||
switch (qstep)
|
switch (qstep)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
// rulefile = "/Users/joachim/gitlab/netgen/rules/hexa.rls";
|
rulefile = "/Users/joachim/gitlab/netgen/rules/hexa.rls";
|
||||||
rulep = hexrules;
|
// rulep = hexrules;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
rulefile += "/rules/prisms2.rls";
|
rulefile += "/rules/prisms2.rls";
|
||||||
@ -125,8 +125,8 @@ namespace netgen
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Meshing3 meshing(rulefile);
|
Meshing3 meshing(rulefile);
|
||||||
Meshing3 meshing(rulep);
|
// Meshing3 meshing(rulep);
|
||||||
|
|
||||||
MeshingParameters mpquad = mp;
|
MeshingParameters mpquad = mp;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user