mirror of
https://github.com/NGSolve/netgen.git
synced 2024-12-26 05:50:32 +05:00
Merge branch 'utils_cleanup' into 'master'
Utility functions for multithreading See merge request jschoeberl/netgen!192
This commit is contained in:
commit
cfa4777318
@ -1016,6 +1016,67 @@ public:
|
||||
|
||||
#endif // USE_NUMA
|
||||
|
||||
// Helper function to calculate coloring of a set of indices for parallel processing of independent elements/points/etc.
|
||||
// Assigns a color to each of colors.Size() elements, such that two elements with the same color don't share a common 'dof',
|
||||
// the mapping from element to dofs is provided by the function getDofs(int) -> iterable<int>
|
||||
//
|
||||
// Returns the number of used colors
|
||||
template <typename Tmask>
|
||||
int ComputeColoring( FlatArray<int> colors, size_t ndofs, Tmask const & getDofs)
|
||||
{
|
||||
static_assert(sizeof(unsigned int)==4, "Adapt type of mask array");
|
||||
auto n = colors.Size();
|
||||
|
||||
Array<unsigned int> mask(ndofs);
|
||||
|
||||
int colored_blocks = 0;
|
||||
|
||||
// We are coloring with 32 colors at once and use each bit to mask conflicts
|
||||
unsigned int check = 0;
|
||||
unsigned int checkbit = 0;
|
||||
|
||||
int current_color = 0;
|
||||
colors = -1;
|
||||
int maxcolor = 0;
|
||||
|
||||
while(colored_blocks<n)
|
||||
{
|
||||
mask = 0;
|
||||
for (auto i : Range(n) )
|
||||
{
|
||||
if(colors[i]>-1) continue;
|
||||
check = 0;
|
||||
const auto & dofs = getDofs(i);
|
||||
|
||||
// Check if adjacent dofs are already marked by current color
|
||||
for (auto dof : dofs)
|
||||
check|=mask[dof];
|
||||
|
||||
// Did we find a free color?
|
||||
if(check != 0xFFFFFFFF)
|
||||
{
|
||||
checkbit = 1;
|
||||
int color = current_color;
|
||||
// find the actual color, which is free (out of 32)
|
||||
while (check & checkbit)
|
||||
{
|
||||
color++;
|
||||
checkbit *= 2;
|
||||
}
|
||||
colors[i] = color;
|
||||
maxcolor = color > maxcolor ? color : maxcolor;
|
||||
colored_blocks++;
|
||||
// mask all adjacent dofs with the found color
|
||||
for (auto dof : dofs)
|
||||
mask[dof] |= checkbit;
|
||||
}
|
||||
}
|
||||
current_color+=32;
|
||||
}
|
||||
return maxcolor+1;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef NETGEN_CORE_UTILS_HPP
|
||||
#define NETGEN_CORE_UTILS_HPP
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
@ -119,6 +120,41 @@ namespace ngcore
|
||||
return std::equal(end.rbegin(), end.rend(), str.rbegin());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
NETGEN_INLINE std::atomic<T> & AsAtomic (T & d)
|
||||
{
|
||||
return reinterpret_cast<std::atomic<T>&> (d);
|
||||
}
|
||||
|
||||
NETGEN_INLINE double AtomicAdd( double & sum, double val )
|
||||
{
|
||||
std::atomic<double> & asum = AsAtomic(sum);
|
||||
double current = asum.load();
|
||||
while (!asum.compare_exchange_weak(current, current + val))
|
||||
;
|
||||
return current;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
NETGEN_INLINE T AtomicMin( T & minval, T val )
|
||||
{
|
||||
std::atomic<T> & aminval = AsAtomic(minval);
|
||||
T current = aminval.load();
|
||||
while (!aminval.compare_exchange_weak(current, std::min(current, val)))
|
||||
;
|
||||
return current;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
NETGEN_INLINE T AtomicMax( T & maxval, T val )
|
||||
{
|
||||
std::atomic<T> & amaxval = AsAtomic(maxval);
|
||||
T current = amaxval.load();
|
||||
while (!amaxval.compare_exchange_weak(current, std::max(current, val)))
|
||||
;
|
||||
return current;
|
||||
}
|
||||
|
||||
} // namespace ngcore
|
||||
|
||||
#endif // NETGEN_CORE_UTILS_HPP
|
||||
|
@ -27,6 +27,7 @@ endmacro()
|
||||
|
||||
add_unit_test(archive archive.cpp)
|
||||
add_unit_test(symboltable symboltable.cpp)
|
||||
add_unit_test(utils utils.cpp)
|
||||
add_unit_test(version version.cpp)
|
||||
|
||||
endif(ENABLE_UNIT_TESTS)
|
||||
|
67
tests/catch/utils.cpp
Normal file
67
tests/catch/utils.cpp
Normal file
@ -0,0 +1,67 @@
|
||||
|
||||
#include "catch.hpp"
|
||||
#include <core/ngcore.hpp>
|
||||
using namespace ngcore;
|
||||
using namespace std;
|
||||
|
||||
|
||||
long shuffle(long N, long i) {
|
||||
// Shuffle the numbers using multiplication with a prime number to force many updates of min, max
|
||||
constexpr long P = 101;
|
||||
return (N/2 + i*P) % N;
|
||||
}
|
||||
|
||||
void testThreading(int n_threads)
|
||||
{
|
||||
TaskManager::SetNumThreads(n_threads);
|
||||
n_threads = EnterTaskManager();
|
||||
|
||||
constexpr long N = 100000;
|
||||
|
||||
|
||||
SECTION( "atomic operations" ) {
|
||||
long i_min = 2*N;
|
||||
long i_max = 0;
|
||||
long i_sum = 0;
|
||||
|
||||
double d_min = 1e100;
|
||||
double d_max = 0.0;
|
||||
double d_sum = 0.0;
|
||||
|
||||
ParallelFor( Range(N), [&] (long i) {
|
||||
AtomicMin(i_min, shuffle(N,i));
|
||||
});
|
||||
REQUIRE( i_min==0 );
|
||||
|
||||
ParallelFor( Range(N), [&] (long i) {
|
||||
AtomicMax(i_max, shuffle(N,i));
|
||||
});
|
||||
REQUIRE( i_max==N-1 );
|
||||
|
||||
ParallelFor( Range(N), [&] (long i) {
|
||||
AsAtomic(i_sum) += i;
|
||||
});
|
||||
REQUIRE( i_sum==N*(N-1)/2 );
|
||||
|
||||
ParallelFor( Range(N), [&] (double i) {
|
||||
AtomicMin(d_min, static_cast<double>(shuffle(N,i)));
|
||||
});
|
||||
REQUIRE( d_min==0 );
|
||||
|
||||
ParallelFor( Range(N), [&] (double i) {
|
||||
AtomicMax(d_max, static_cast<double>(shuffle(N,i)));
|
||||
});
|
||||
REQUIRE( d_max==N-1 );
|
||||
|
||||
ParallelFor( Range(N), [&] (double i) {
|
||||
AtomicAdd(d_sum, i);
|
||||
});
|
||||
REQUIRE( d_sum==N*(N-1)/2 );
|
||||
|
||||
}
|
||||
ExitTaskManager(n_threads);
|
||||
}
|
||||
|
||||
TEST_CASE("Threading - 1 Thread") { testThreading(1); }
|
||||
TEST_CASE("Threading - 2 Thread") { testThreading(2); }
|
||||
TEST_CASE("Threading - 8 Thread") { testThreading(8); }
|
Loading…
Reference in New Issue
Block a user