mirror of
https://github.com/NGSolve/netgen.git
synced 2025-01-11 21:50:34 +05:00
complex FMA for SIMD<double>
This commit is contained in:
parent
a8e41734cf
commit
5c7d39b3fb
@ -127,6 +127,12 @@ namespace ngcore
|
||||
}
|
||||
|
||||
|
||||
NETGEN_INLINE SIMD<double,2> SwapPairs (SIMD<double,2> a)
|
||||
{
|
||||
return __builtin_shufflevector(a.Data(), a.Data(), 1, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// a*b+c
|
||||
NETGEN_INLINE SIMD<double,2> FMA (SIMD<double,2> a, SIMD<double,2> b, SIMD<double,2> c)
|
||||
@ -148,6 +154,16 @@ namespace ngcore
|
||||
return FNMA(SIMD<double,2> (a), b, c);
|
||||
}
|
||||
|
||||
// ARM complex mult:
|
||||
// https://arxiv.org/pdf/1901.07294.pdf
|
||||
// c += a*b (a0re, a0im, a1re, a1im, ...),
|
||||
NETGEN_INLINE void FMAComplex (SIMD<double,2> a, SIMD<double,2> b, SIMD<double,2> & c)
|
||||
{
|
||||
auto tmp = vcmlaq_f64(c.Data(), a.Data(), b.Data()); // are * b
|
||||
c = vcmlaq_rot90_f64(tmp, a.Data(), b.Data()); // += i*aim * b
|
||||
}
|
||||
|
||||
|
||||
NETGEN_INLINE SIMD<double,2> operator+ (SIMD<double,2> a, SIMD<double,2> b)
|
||||
{ return a.Data()+b.Data(); }
|
||||
|
||||
|
@ -559,6 +559,15 @@ namespace ngcore
|
||||
sum = FNMA(a,b,sum);
|
||||
}
|
||||
|
||||
// c += a*b (a0re, a0im, a1re, a1im, ...),
|
||||
template <int N>
|
||||
void FMAComplex (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & c)
|
||||
{
|
||||
auto [are, aim] = Unpack(a, a);
|
||||
SIMD<double,N> bswap = SwapPairs(b);
|
||||
SIMD<double,N> aim_bswap = aim*bswap;
|
||||
c += FMAddSub (are, b, aim_bswap);
|
||||
}
|
||||
|
||||
template <int i, typename T, int N>
|
||||
T get(SIMD<T,N> a) { return a.template Get<i>(); }
|
||||
|
Loading…
Reference in New Issue
Block a user