diff --git a/libsrc/core/simd.hpp b/libsrc/core/simd.hpp index 3459e66d..4a646dca 100644 --- a/libsrc/core/simd.hpp +++ b/libsrc/core/simd.hpp @@ -46,19 +46,27 @@ namespace ngcore } #endif - NETGEN_INLINE void SIMDTranspose (SIMD a1, SIMD a2, SIMD a3, SIMD a4, - SIMD & b1, SIMD & b2, SIMD & b3, SIMD & b4) + SIMD & b1, SIMD & b2, SIMD & b3, SIMD & b4) { - SIMD h1,h2,h3,h4; - std::tie(h1,h2) = Unpack(a1,a2); - std::tie(h3,h4) = Unpack(a3,a4); - b1 = SIMD (h1.Lo(), h3.Lo()); - b2 = SIMD (h2.Lo(), h4.Lo()); - b3 = SIMD (h1.Hi(), h3.Hi()); - b4 = SIMD (h2.Hi(), h4.Hi()); + if constexpr (sizeof(a1.Lo()) == 16) + { + auto [h1,h2] = Unpack(a1,a2); + auto [h3,h4] = Unpack(a3,a4); + b1 = SIMD (h1.Lo(), h3.Lo()); + b2 = SIMD (h2.Lo(), h4.Lo()); + b3 = SIMD (h1.Hi(), h3.Hi()); + b4 = SIMD (h2.Hi(), h4.Hi()); + } + else + { + b1 = SIMD (a1[0], a2[0], a3[0], a4[0]); + b2 = SIMD (a1[1], a2[1], a3[1], a4[1]); + b3 = SIMD (a1[2], a2[2], a3[2], a4[2]); + b4 = SIMD (a1[3], a2[3], a3[3], a4[3]); + } } - + template NETGEN_INLINE auto HSum (SIMD s1, SIMD s2) {