complex FMA for SIMD<double>

This commit is contained in:
Joachim Schoeberl 2023-08-01 17:44:43 +02:00
parent a8e41734cf
commit 5c7d39b3fb
2 changed files with 25 additions and 0 deletions

View File

@ -126,8 +126,14 @@ namespace ngcore
return SIMD<double,4> (HSum(a,b), HSum(c,d));
}
NETGEN_INLINE SIMD<double,2> SwapPairs (SIMD<double,2> a)
{
return __builtin_shufflevector(a.Data(), a.Data(), 1, 0);
}
// a*b+c
NETGEN_INLINE SIMD<double,2> FMA (SIMD<double,2> a, SIMD<double,2> b, SIMD<double,2> c)
{
@ -148,6 +154,16 @@ namespace ngcore
return FNMA(SIMD<double,2> (a), b, c);
}
// ARM complex mult:
// https://arxiv.org/pdf/1901.07294.pdf
// c += a*b (a0re, a0im, a1re, a1im, ...),
NETGEN_INLINE void FMAComplex (SIMD<double,2> a, SIMD<double,2> b, SIMD<double,2> & c)
{
auto tmp = vcmlaq_f64(c.Data(), a.Data(), b.Data()); // are * b
c = vcmlaq_rot90_f64(tmp, a.Data(), b.Data()); // += i*aim * b
}
NETGEN_INLINE SIMD<double,2> operator+ (SIMD<double,2> a, SIMD<double,2> b)
{ return a.Data()+b.Data(); }

View File

@ -559,6 +559,15 @@ namespace ngcore
sum = FNMA(a,b,sum);
}
// c += a*b (a0re, a0im, a1re, a1im, ...),
template <int N>
void FMAComplex (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & c)
{
auto [are, aim] = Unpack(a, a);
SIMD<double,N> bswap = SwapPairs(b);
SIMD<double,N> aim_bswap = aim*bswap;
c += FMAddSub (are, b, aim_bswap);
}
template <int i, typename T, int N>
T get(SIMD<T,N> a) { return a.template Get<i>(); }