mirror of
https://github.com/NGSolve/netgen.git
synced 2024-12-24 21:10:33 +05:00
complex FMA for SIMD<double>
This commit is contained in:
parent
a8e41734cf
commit
5c7d39b3fb
@ -126,8 +126,14 @@ namespace ngcore
|
|||||||
return SIMD<double,4> (HSum(a,b), HSum(c,d));
|
return SIMD<double,4> (HSum(a,b), HSum(c,d));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NETGEN_INLINE SIMD<double,2> SwapPairs (SIMD<double,2> a)
|
||||||
|
{
|
||||||
|
return __builtin_shufflevector(a.Data(), a.Data(), 1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// a*b+c
|
// a*b+c
|
||||||
NETGEN_INLINE SIMD<double,2> FMA (SIMD<double,2> a, SIMD<double,2> b, SIMD<double,2> c)
|
NETGEN_INLINE SIMD<double,2> FMA (SIMD<double,2> a, SIMD<double,2> b, SIMD<double,2> c)
|
||||||
{
|
{
|
||||||
@ -148,6 +154,16 @@ namespace ngcore
|
|||||||
return FNMA(SIMD<double,2> (a), b, c);
|
return FNMA(SIMD<double,2> (a), b, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ARM complex mult:
|
||||||
|
// https://arxiv.org/pdf/1901.07294.pdf
|
||||||
|
// c += a*b (a0re, a0im, a1re, a1im, ...),
|
||||||
|
NETGEN_INLINE void FMAComplex (SIMD<double,2> a, SIMD<double,2> b, SIMD<double,2> & c)
|
||||||
|
{
|
||||||
|
auto tmp = vcmlaq_f64(c.Data(), a.Data(), b.Data()); // are * b
|
||||||
|
c = vcmlaq_rot90_f64(tmp, a.Data(), b.Data()); // += i*aim * b
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
NETGEN_INLINE SIMD<double,2> operator+ (SIMD<double,2> a, SIMD<double,2> b)
|
NETGEN_INLINE SIMD<double,2> operator+ (SIMD<double,2> a, SIMD<double,2> b)
|
||||||
{ return a.Data()+b.Data(); }
|
{ return a.Data()+b.Data(); }
|
||||||
|
|
||||||
|
@ -559,6 +559,15 @@ namespace ngcore
|
|||||||
sum = FNMA(a,b,sum);
|
sum = FNMA(a,b,sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// c += a*b (a0re, a0im, a1re, a1im, ...),
|
||||||
|
template <int N>
|
||||||
|
void FMAComplex (SIMD<double,N> a, SIMD<double,N> b, SIMD<double,N> & c)
|
||||||
|
{
|
||||||
|
auto [are, aim] = Unpack(a, a);
|
||||||
|
SIMD<double,N> bswap = SwapPairs(b);
|
||||||
|
SIMD<double,N> aim_bswap = aim*bswap;
|
||||||
|
c += FMAddSub (are, b, aim_bswap);
|
||||||
|
}
|
||||||
|
|
||||||
template <int i, typename T, int N>
|
template <int i, typename T, int N>
|
||||||
T get(SIMD<T,N> a) { return a.template Get<i>(); }
|
T get(SIMD<T,N> a) { return a.template Get<i>(); }
|
||||||
|
Loading…
Reference in New Issue
Block a user