Merge branch 'utils_cleanup' into 'master'

Utility functions for multithreading

See merge request jschoeberl/netgen!192
This commit is contained in:
Joachim Schöberl 2019-08-08 15:08:33 +00:00
commit cfa4777318
4 changed files with 165 additions and 0 deletions

View File

@ -1016,6 +1016,67 @@ public:
#endif // USE_NUMA #endif // USE_NUMA
// Helper function to calculate coloring of a set of indices for parallel processing of independent elements/points/etc.
// Assigns a color to each of colors.Size() elements, such that two elements with the same color don't share a common 'dof',
// the mapping from element to dofs is provided by the function getDofs(int) -> iterable<int>
//
// Returns the number of used colors
template <typename Tmask>
int ComputeColoring( FlatArray<int> colors, size_t ndofs, Tmask const & getDofs)
{
static_assert(sizeof(unsigned int)==4, "Adapt type of mask array");
auto n = colors.Size();
Array<unsigned int> mask(ndofs);
int colored_blocks = 0;
// We are coloring with 32 colors at once and use each bit to mask conflicts
unsigned int check = 0;
unsigned int checkbit = 0;
int current_color = 0;
colors = -1;
int maxcolor = 0;
while(colored_blocks<n)
{
mask = 0;
for (auto i : Range(n) )
{
if(colors[i]>-1) continue;
check = 0;
const auto & dofs = getDofs(i);
// Check if adjacent dofs are already marked by current color
for (auto dof : dofs)
check|=mask[dof];
// Did we find a free color?
if(check != 0xFFFFFFFF)
{
checkbit = 1;
int color = current_color;
// find the actual color, which is free (out of 32)
while (check & checkbit)
{
color++;
checkbit *= 2;
}
colors[i] = color;
maxcolor = color > maxcolor ? color : maxcolor;
colored_blocks++;
// mask all adjacent dofs with the found color
for (auto dof : dofs)
mask[dof] |= checkbit;
}
}
current_color+=32;
}
return maxcolor+1;
}
} }

View File

@ -1,6 +1,7 @@
#ifndef NETGEN_CORE_UTILS_HPP #ifndef NETGEN_CORE_UTILS_HPP
#define NETGEN_CORE_UTILS_HPP #define NETGEN_CORE_UTILS_HPP
#include <atomic>
#include <chrono> #include <chrono>
#include <map> #include <map>
#include <ostream> #include <ostream>
@ -119,6 +120,41 @@ namespace ngcore
return std::equal(end.rbegin(), end.rend(), str.rbegin()); return std::equal(end.rbegin(), end.rend(), str.rbegin());
} }
template<typename T>
NETGEN_INLINE std::atomic<T> & AsAtomic (T & d)
{
return reinterpret_cast<std::atomic<T>&> (d);
}
NETGEN_INLINE double AtomicAdd( double & sum, double val )
{
std::atomic<double> & asum = AsAtomic(sum);
double current = asum.load();
while (!asum.compare_exchange_weak(current, current + val))
;
return current;
}
template<typename T>
NETGEN_INLINE T AtomicMin( T & minval, T val )
{
std::atomic<T> & aminval = AsAtomic(minval);
T current = aminval.load();
while (!aminval.compare_exchange_weak(current, std::min(current, val)))
;
return current;
}
template<typename T>
NETGEN_INLINE T AtomicMax( T & maxval, T val )
{
std::atomic<T> & amaxval = AsAtomic(maxval);
T current = amaxval.load();
while (!amaxval.compare_exchange_weak(current, std::max(current, val)))
;
return current;
}
} // namespace ngcore } // namespace ngcore
#endif // NETGEN_CORE_UTILS_HPP #endif // NETGEN_CORE_UTILS_HPP

View File

@ -27,6 +27,7 @@ endmacro()
add_unit_test(archive archive.cpp) add_unit_test(archive archive.cpp)
add_unit_test(symboltable symboltable.cpp) add_unit_test(symboltable symboltable.cpp)
add_unit_test(utils utils.cpp)
add_unit_test(version version.cpp) add_unit_test(version version.cpp)
endif(ENABLE_UNIT_TESTS) endif(ENABLE_UNIT_TESTS)

67
tests/catch/utils.cpp Normal file
View File

@ -0,0 +1,67 @@
#include "catch.hpp"
#include <core/ngcore.hpp>
using namespace ngcore;
using namespace std;
long shuffle(long N, long i) {
// Shuffle the numbers using multiplication with a prime number to force many updates of min, max
constexpr long P = 101;
return (N/2 + i*P) % N;
}
void testThreading(int n_threads)
{
TaskManager::SetNumThreads(n_threads);
n_threads = EnterTaskManager();
constexpr long N = 100000;
SECTION( "atomic operations" ) {
long i_min = 2*N;
long i_max = 0;
long i_sum = 0;
double d_min = 1e100;
double d_max = 0.0;
double d_sum = 0.0;
ParallelFor( Range(N), [&] (long i) {
AtomicMin(i_min, shuffle(N,i));
});
REQUIRE( i_min==0 );
ParallelFor( Range(N), [&] (long i) {
AtomicMax(i_max, shuffle(N,i));
});
REQUIRE( i_max==N-1 );
ParallelFor( Range(N), [&] (long i) {
AsAtomic(i_sum) += i;
});
REQUIRE( i_sum==N*(N-1)/2 );
ParallelFor( Range(N), [&] (double i) {
AtomicMin(d_min, static_cast<double>(shuffle(N,i)));
});
REQUIRE( d_min==0 );
ParallelFor( Range(N), [&] (double i) {
AtomicMax(d_max, static_cast<double>(shuffle(N,i)));
});
REQUIRE( d_max==N-1 );
ParallelFor( Range(N), [&] (double i) {
AtomicAdd(d_sum, i);
});
REQUIRE( d_sum==N*(N-1)/2 );
}
ExitTaskManager(n_threads);
}
TEST_CASE("Threading - 1 Thread") { testThreading(1); }
TEST_CASE("Threading - 2 Thread") { testThreading(2); }
TEST_CASE("Threading - 8 Thread") { testThreading(8); }