Merge branch 'utils_cleanup' into 'master'

Utility functions for multithreading See merge request jschoeberl/netgen!192
2025-04-29 11:00:49 +05:00 · 2019-08-08 15:08:33 +00:00 · 2019-08-08 15:08:33 +00:00 · cfa4777318
commit cfa4777318
parent e8960ebae1 a99ea4ae85
4 changed files with 165 additions and 0 deletions
--- a/libsrc/core/taskmanager.hpp
+++ b/libsrc/core/taskmanager.hpp
@ -1016,6 +1016,67 @@ public:
 #endif // USE_NUMA
  //   Helper function to calculate coloring of a set of indices for parallel processing of independent elements/points/etc.
  //   Assigns a color to each of colors.Size() elements, such that two elements with the same color don't share a common 'dof',
  //   the mapping from element to dofs is provided by the function getDofs(int) -> iterable<int>
  //
  //   Returns the number of used colors
  template <typename Tmask>
  int ComputeColoring( FlatArray<int> colors, size_t ndofs, Tmask const & getDofs)
  {
    static_assert(sizeof(unsigned int)==4, "Adapt type of mask array");
    auto n = colors.Size();
    Array<unsigned int> mask(ndofs);
    int colored_blocks = 0;
    // We are coloring with 32 colors at once and use each bit to mask conflicts
    unsigned int check = 0;
    unsigned int checkbit = 0;
    int current_color = 0;
    colors = -1;
    int maxcolor = 0;
    while(colored_blocks<n)
    {
        mask = 0;
        for (auto i : Range(n) )
        {
            if(colors[i]>-1) continue;
            check = 0;
            const auto & dofs = getDofs(i);
            // Check if adjacent dofs are already marked by current color
            for (auto dof : dofs)
                check|=mask[dof];
            // Did we find a free color?
            if(check != 0xFFFFFFFF)
            {
                checkbit = 1;
                int color = current_color;
                // find the actual color, which is free (out of 32)
                while (check & checkbit)
                {
                    color++;
                    checkbit *= 2;
                }
                colors[i] = color;
                maxcolor = color > maxcolor ? color : maxcolor;
                colored_blocks++;
                // mask all adjacent dofs with the found color
                for (auto dof : dofs)
                    mask[dof] |= checkbit;
            }
        }
        current_color+=32;
    }
    return maxcolor+1;
  }
 }
--- a/libsrc/core/utils.hpp
+++ b/libsrc/core/utils.hpp
@ -1,6 +1,7 @@
 #ifndef NETGEN_CORE_UTILS_HPP
 #define NETGEN_CORE_UTILS_HPP
 #include <atomic>
 #include <chrono>
 #include <map>
 #include <ostream>
@ -119,6 +120,41 @@ namespace ngcore
    return std::equal(end.rbegin(), end.rend(), str.rbegin());
  }
  template<typename T>
  NETGEN_INLINE std::atomic<T> & AsAtomic (T & d)
  {
    return reinterpret_cast<std::atomic<T>&> (d);
  }
  NETGEN_INLINE double AtomicAdd( double & sum, double val )
  {
      std::atomic<double> & asum = AsAtomic(sum);
      double current = asum.load();
      while (!asum.compare_exchange_weak(current, current + val))
          ;
      return current;
  }
  template<typename T>
  NETGEN_INLINE T AtomicMin( T & minval, T val )
  {
      std::atomic<T> & aminval = AsAtomic(minval);
      T current = aminval.load();
      while (!aminval.compare_exchange_weak(current, std::min(current, val)))
          ;
      return current;
  }
  template<typename T>
  NETGEN_INLINE T AtomicMax( T & maxval, T val )
  {
      std::atomic<T> & amaxval = AsAtomic(maxval);
      T current = amaxval.load();
      while (!amaxval.compare_exchange_weak(current, std::max(current, val)))
          ;
      return current;
  }
 } // namespace ngcore
 #endif // NETGEN_CORE_UTILS_HPP
--- a/tests/catch/CMakeLists.txt
+++ b/tests/catch/CMakeLists.txt
@ -27,6 +27,7 @@ endmacro()
 add_unit_test(archive archive.cpp)
 add_unit_test(symboltable symboltable.cpp)
 add_unit_test(utils utils.cpp)
 add_unit_test(version version.cpp)
 endif(ENABLE_UNIT_TESTS)
--- a/tests/catch/utils.cpp
+++ b/tests/catch/utils.cpp
@ -0,0 +1,67 @@
 #include "catch.hpp"
 #include <core/ngcore.hpp>
 using namespace ngcore;
 using namespace std;
 long shuffle(long N, long i) {
    // Shuffle the numbers using multiplication with a prime number to force many updates of min, max
    constexpr long P = 101;
    return (N/2 + i*P) % N;
 }
 void testThreading(int n_threads)
 {
  TaskManager::SetNumThreads(n_threads);
  n_threads = EnterTaskManager();
  constexpr long N = 100000;
  SECTION( "atomic operations" ) {
      long i_min = 2*N;
      long i_max = 0;
      long i_sum = 0;
      double d_min = 1e100;
      double d_max = 0.0;
      double d_sum = 0.0;
      ParallelFor( Range(N), [&] (long i) {
          AtomicMin(i_min, shuffle(N,i));
      });
      REQUIRE( i_min==0 );
      ParallelFor( Range(N), [&] (long i) {
          AtomicMax(i_max, shuffle(N,i));
      });
      REQUIRE( i_max==N-1 );
      ParallelFor( Range(N), [&] (long i) {
          AsAtomic(i_sum) += i;
      });
      REQUIRE( i_sum==N*(N-1)/2 );
      ParallelFor( Range(N), [&] (double i) {
          AtomicMin(d_min, static_cast<double>(shuffle(N,i)));
      });
      REQUIRE( d_min==0 );
      ParallelFor( Range(N), [&] (double i) {
          AtomicMax(d_max, static_cast<double>(shuffle(N,i)));
      });
      REQUIRE( d_max==N-1 );
      ParallelFor( Range(N), [&] (double i) {
          AtomicAdd(d_sum, i);
      });
      REQUIRE( d_sum==N*(N-1)/2 );
  }
  ExitTaskManager(n_threads);
 }
 TEST_CASE("Threading - 1 Thread") { testThreading(1); }
 TEST_CASE("Threading - 2 Thread") { testThreading(2); }
 TEST_CASE("Threading - 8 Thread") { testThreading(8); }