fnma intrinsic for avx512

This commit is contained in:
Joachim Schöberl 2020-12-22 13:06:08 +01:00
parent c1c10174be
commit ea7f6c1e94
3 changed files with 28 additions and 3 deletions

View File

@ -184,6 +184,14 @@ namespace ngcore
{
return _mm256_fmadd_pd (_mm256_set1_pd(a), b.Data(), c.Data());
}
NETGEN_INLINE SIMD<double,4> FNMA (SIMD<double,4> a, SIMD<double,4> b, SIMD<double,4> c)
{
return _mm256_fnmadd_pd (a.Data(), b.Data(), c.Data());
}
NETGEN_INLINE SIMD<double,4> FNMA (const double & a, SIMD<double,4> b, SIMD<double,4> c)
{
return _mm256_fnmadd_pd (_mm256_set1_pd(a), b.Data(), c.Data());
}
#endif
#if defined(__FMA__) && !defined(__AVX512F__)

View File

@ -234,6 +234,16 @@ namespace ngcore
{
return _mm512_fmadd_pd (_mm512_set1_pd(a), b.Data(), c.Data());
}
NETGEN_INLINE SIMD<double,8> FNMA (SIMD<double,8> a, SIMD<double,8> b, SIMD<double,8> c)
{
return _mm512_fnmadd_pd (a.Data(), b.Data(), c.Data());
}
NETGEN_INLINE SIMD<double,8> FNMA (const double & a, SIMD<double,8> b, SIMD<double,8> c)
{
return _mm512_fnmadd_pd (_mm512_set1_pd(a), b.Data(), c.Data());
}
}
#endif // NETGEN_CORE_SIMD_AVX512_HPP

View File

@ -513,11 +513,17 @@ namespace ngcore
}
template <typename T1, typename T2, typename T3>
// a*b+c
template <typename T1, typename T2, typename T3>
NETGEN_INLINE auto FMA(T1 a, T2 b, T3 c)
{
return a*b+c;
return c+a*b;
}
template <typename T1, typename T2, typename T3>
NETGEN_INLINE auto FNMA(T1 a, T2 b, T3 c)
{
return c-a*b;
}
// update form of fma
@ -531,7 +537,8 @@ namespace ngcore
template <int N>
void FNMAasm (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & sum)
{
sum -= a*b;
// sum -= a*b;
sum = FNMA(a,b,sum);
}