Merge branch 'utils_cleanup' into 'master'

Utility functions for multithreading See merge request jschoeberl/netgen!192
2024-12-26 05:50:32 +05:00 · 2019-08-08 15:08:33 +00:00 · 2019-08-08 15:08:33 +00:00 · cfa4777318
commit cfa4777318
parent e8960ebae1 a99ea4ae85
4 changed files with 165 additions and 0 deletions
--- a/libsrc/core/taskmanager.hpp
+++ b/libsrc/core/taskmanager.hpp
@ -1016,6 +1016,67 @@ public:
  
 #endif // USE_NUMA

+  //   Helper function to calculate coloring of a set of indices for parallel processing of independent elements/points/etc.
+  //   Assigns a color to each of colors.Size() elements, such that two elements with the same color don't share a common 'dof',
+  //   the mapping from element to dofs is provided by the function getDofs(int) -> iterable<int>
+  //
+  //   Returns the number of used colors
+  template <typename Tmask>
+  int ComputeColoring( FlatArray<int> colors, size_t ndofs, Tmask const & getDofs)
+  {
+    static_assert(sizeof(unsigned int)==4, "Adapt type of mask array");
+    auto n = colors.Size();
+
+    Array<unsigned int> mask(ndofs);
+
+    int colored_blocks = 0;
+
+    // We are coloring with 32 colors at once and use each bit to mask conflicts
+    unsigned int check = 0;
+    unsigned int checkbit = 0;
+
+    int current_color = 0;
+    colors = -1;
+    int maxcolor = 0;
+
+    while(colored_blocks<n)
+    {
+        mask = 0;
+        for (auto i : Range(n) )
+        {
+            if(colors[i]>-1) continue;
+            check = 0;
+            const auto & dofs = getDofs(i);
+
+            // Check if adjacent dofs are already marked by current color
+            for (auto dof : dofs)
+                check|=mask[dof];
+
+            // Did we find a free color?
+            if(check != 0xFFFFFFFF)
+            {
+                checkbit = 1;
+                int color = current_color;
+                // find the actual color, which is free (out of 32)
+                while (check & checkbit)
+                {
+                    color++;
+                    checkbit *= 2;
+                }
+                colors[i] = color;
+                maxcolor = color > maxcolor ? color : maxcolor;
+                colored_blocks++;
+                // mask all adjacent dofs with the found color
+                for (auto dof : dofs)
+                    mask[dof] |= checkbit;
+            }
+        }
+        current_color+=32;
+    }
+    return maxcolor+1;
+  }
+
+
 }


--- a/libsrc/core/utils.hpp
+++ b/libsrc/core/utils.hpp
@ -1,6 +1,7 @@
 #ifndef NETGEN_CORE_UTILS_HPP
 #define NETGEN_CORE_UTILS_HPP

+#include <atomic>
 #include <chrono>
 #include <map>
 #include <ostream>
@ -119,6 +120,41 @@ namespace ngcore
    return std::equal(end.rbegin(), end.rend(), str.rbegin());
  }

+  template<typename T>
+  NETGEN_INLINE std::atomic<T> & AsAtomic (T & d)
+  {
+    return reinterpret_cast<std::atomic<T>&> (d);
+  }
+
+  NETGEN_INLINE double AtomicAdd( double & sum, double val )
+  {
+      std::atomic<double> & asum = AsAtomic(sum);
+      double current = asum.load();
+      while (!asum.compare_exchange_weak(current, current + val))
+          ;
+      return current;
+  }
+
+  template<typename T>
+  NETGEN_INLINE T AtomicMin( T & minval, T val )
+  {
+      std::atomic<T> & aminval = AsAtomic(minval);
+      T current = aminval.load();
+      while (!aminval.compare_exchange_weak(current, std::min(current, val)))
+          ;
+      return current;
+  }
+
+  template<typename T>
+  NETGEN_INLINE T AtomicMax( T & maxval, T val )
+  {
+      std::atomic<T> & amaxval = AsAtomic(maxval);
+      T current = amaxval.load();
+      while (!amaxval.compare_exchange_weak(current, std::max(current, val)))
+          ;
+      return current;
+  }
+
 } // namespace ngcore

 #endif // NETGEN_CORE_UTILS_HPP
--- a/tests/catch/CMakeLists.txt
+++ b/tests/catch/CMakeLists.txt
@ -27,6 +27,7 @@ endmacro()

 add_unit_test(archive archive.cpp)
 add_unit_test(symboltable symboltable.cpp)
+add_unit_test(utils utils.cpp)
 add_unit_test(version version.cpp)

 endif(ENABLE_UNIT_TESTS)
--- a/tests/catch/utils.cpp
+++ b/tests/catch/utils.cpp
@ -0,0 +1,67 @@
+
+#include "catch.hpp"
+#include <core/ngcore.hpp>
+using namespace ngcore;
+using namespace std;
+
+
+long shuffle(long N, long i) {
+    // Shuffle the numbers using multiplication with a prime number to force many updates of min, max
+    constexpr long P = 101;
+    return (N/2 + i*P) % N;
+}
+
+void testThreading(int n_threads)
+{
+  TaskManager::SetNumThreads(n_threads);
+  n_threads = EnterTaskManager();
+
+  constexpr long N = 100000;
+
+
+  SECTION( "atomic operations" ) {
+      long i_min = 2*N;
+      long i_max = 0;
+      long i_sum = 0;
+
+      double d_min = 1e100;
+      double d_max = 0.0;
+      double d_sum = 0.0;
+
+      ParallelFor( Range(N), [&] (long i) {
+          AtomicMin(i_min, shuffle(N,i));
+      });
+      REQUIRE( i_min==0 );
+
+      ParallelFor( Range(N), [&] (long i) {
+          AtomicMax(i_max, shuffle(N,i));
+      });
+      REQUIRE( i_max==N-1 );
+
+      ParallelFor( Range(N), [&] (long i) {
+          AsAtomic(i_sum) += i;
+      });
+      REQUIRE( i_sum==N*(N-1)/2 );
+
+      ParallelFor( Range(N), [&] (double i) {
+          AtomicMin(d_min, static_cast<double>(shuffle(N,i)));
+      });
+      REQUIRE( d_min==0 );
+
+      ParallelFor( Range(N), [&] (double i) {
+          AtomicMax(d_max, static_cast<double>(shuffle(N,i)));
+      });
+      REQUIRE( d_max==N-1 );
+
+      ParallelFor( Range(N), [&] (double i) {
+          AtomicAdd(d_sum, i);
+      });
+      REQUIRE( d_sum==N*(N-1)/2 );
+
+  }
+  ExitTaskManager(n_threads);
+}
+
+TEST_CASE("Threading - 1 Thread") { testThreading(1); }
+TEST_CASE("Threading - 2 Thread") { testThreading(2); }
+TEST_CASE("Threading - 8 Thread") { testThreading(8); }