diff --git a/libsrc/core/taskmanager.hpp b/libsrc/core/taskmanager.hpp index af2ae0c7..c8373627 100644 --- a/libsrc/core/taskmanager.hpp +++ b/libsrc/core/taskmanager.hpp @@ -1016,6 +1016,67 @@ public: #endif // USE_NUMA + // Helper function to calculate coloring of a set of indices for parallel processing of independent elements/points/etc. + // Assigns a color to each of colors.Size() elements, such that two elements with the same color don't share a common 'dof', + // the mapping from element to dofs is provided by the function getDofs(int) -> iterable + // + // Returns the number of used colors + template + int ComputeColoring( FlatArray colors, size_t ndofs, Tmask const & getDofs) + { + static_assert(sizeof(unsigned int)==4, "Adapt type of mask array"); + auto n = colors.Size(); + + Array mask(ndofs); + + int colored_blocks = 0; + + // We are coloring with 32 colors at once and use each bit to mask conflicts + unsigned int check = 0; + unsigned int checkbit = 0; + + int current_color = 0; + colors = -1; + int maxcolor = 0; + + while(colored_blocks-1) continue; + check = 0; + const auto & dofs = getDofs(i); + + // Check if adjacent dofs are already marked by current color + for (auto dof : dofs) + check|=mask[dof]; + + // Did we find a free color? + if(check != 0xFFFFFFFF) + { + checkbit = 1; + int color = current_color; + // find the actual color, which is free (out of 32) + while (check & checkbit) + { + color++; + checkbit *= 2; + } + colors[i] = color; + maxcolor = color > maxcolor ? color : maxcolor; + colored_blocks++; + // mask all adjacent dofs with the found color + for (auto dof : dofs) + mask[dof] |= checkbit; + } + } + current_color+=32; + } + return maxcolor+1; + } + + } diff --git a/libsrc/core/utils.hpp b/libsrc/core/utils.hpp index 3645ea18..e1da760c 100644 --- a/libsrc/core/utils.hpp +++ b/libsrc/core/utils.hpp @@ -1,6 +1,7 @@ #ifndef NETGEN_CORE_UTILS_HPP #define NETGEN_CORE_UTILS_HPP +#include #include #include #include @@ -119,6 +120,41 @@ namespace ngcore return std::equal(end.rbegin(), end.rend(), str.rbegin()); } + template + NETGEN_INLINE std::atomic & AsAtomic (T & d) + { + return reinterpret_cast&> (d); + } + + NETGEN_INLINE double AtomicAdd( double & sum, double val ) + { + std::atomic & asum = AsAtomic(sum); + double current = asum.load(); + while (!asum.compare_exchange_weak(current, current + val)) + ; + return current; + } + + template + NETGEN_INLINE T AtomicMin( T & minval, T val ) + { + std::atomic & aminval = AsAtomic(minval); + T current = aminval.load(); + while (!aminval.compare_exchange_weak(current, std::min(current, val))) + ; + return current; + } + + template + NETGEN_INLINE T AtomicMax( T & maxval, T val ) + { + std::atomic & amaxval = AsAtomic(maxval); + T current = amaxval.load(); + while (!amaxval.compare_exchange_weak(current, std::max(current, val))) + ; + return current; + } + } // namespace ngcore #endif // NETGEN_CORE_UTILS_HPP diff --git a/tests/catch/CMakeLists.txt b/tests/catch/CMakeLists.txt index 1dc6dd10..3b727af8 100644 --- a/tests/catch/CMakeLists.txt +++ b/tests/catch/CMakeLists.txt @@ -27,6 +27,7 @@ endmacro() add_unit_test(archive archive.cpp) add_unit_test(symboltable symboltable.cpp) +add_unit_test(utils utils.cpp) add_unit_test(version version.cpp) endif(ENABLE_UNIT_TESTS) diff --git a/tests/catch/utils.cpp b/tests/catch/utils.cpp new file mode 100644 index 00000000..4258b5b9 --- /dev/null +++ b/tests/catch/utils.cpp @@ -0,0 +1,67 @@ + +#include "catch.hpp" +#include +using namespace ngcore; +using namespace std; + + +long shuffle(long N, long i) { + // Shuffle the numbers using multiplication with a prime number to force many updates of min, max + constexpr long P = 101; + return (N/2 + i*P) % N; +} + +void testThreading(int n_threads) +{ + TaskManager::SetNumThreads(n_threads); + n_threads = EnterTaskManager(); + + constexpr long N = 100000; + + + SECTION( "atomic operations" ) { + long i_min = 2*N; + long i_max = 0; + long i_sum = 0; + + double d_min = 1e100; + double d_max = 0.0; + double d_sum = 0.0; + + ParallelFor( Range(N), [&] (long i) { + AtomicMin(i_min, shuffle(N,i)); + }); + REQUIRE( i_min==0 ); + + ParallelFor( Range(N), [&] (long i) { + AtomicMax(i_max, shuffle(N,i)); + }); + REQUIRE( i_max==N-1 ); + + ParallelFor( Range(N), [&] (long i) { + AsAtomic(i_sum) += i; + }); + REQUIRE( i_sum==N*(N-1)/2 ); + + ParallelFor( Range(N), [&] (double i) { + AtomicMin(d_min, static_cast(shuffle(N,i))); + }); + REQUIRE( d_min==0 ); + + ParallelFor( Range(N), [&] (double i) { + AtomicMax(d_max, static_cast(shuffle(N,i))); + }); + REQUIRE( d_max==N-1 ); + + ParallelFor( Range(N), [&] (double i) { + AtomicAdd(d_sum, i); + }); + REQUIRE( d_sum==N*(N-1)/2 ); + + } + ExitTaskManager(n_threads); +} + +TEST_CASE("Threading - 1 Thread") { testThreading(1); } +TEST_CASE("Threading - 2 Thread") { testThreading(2); } +TEST_CASE("Threading - 8 Thread") { testThreading(8); }