From dca5a01e0477dce3829828c73dcc79bf8a1fa4f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20Sch=C3=B6berl?= Date: Thu, 4 Jan 2018 13:00:01 +0100 Subject: [PATCH] more parallel in topology --- libsrc/general/parthreads.hpp | 6 ++++++ libsrc/general/table.hpp | 6 ++++++ libsrc/include/mystdlib.h | 2 +- libsrc/meshing/clusters.cpp | 5 +++-- libsrc/meshing/clusters.hpp | 2 +- libsrc/meshing/meshclass.cpp | 2 +- libsrc/meshing/topology.cpp | 23 ++++++++++++++--------- 7 files changed, 32 insertions(+), 14 deletions(-) diff --git a/libsrc/general/parthreads.hpp b/libsrc/general/parthreads.hpp index e799b8ab..05521df1 100644 --- a/libsrc/general/parthreads.hpp +++ b/libsrc/general/parthreads.hpp @@ -95,6 +95,12 @@ void ParallelFor( int first, int next, const TFunc & f ) } + + template + inline atomic & AsAtomic (T & d) + { + return reinterpret_cast&> (d); + } typedef void (*TaskManager)(std::function); typedef void (*Tracer)(string, bool); // false .. start, true .. stop diff --git a/libsrc/general/table.hpp b/libsrc/general/table.hpp index e3ac08c4..af6ea49e 100644 --- a/libsrc/general/table.hpp +++ b/libsrc/general/table.hpp @@ -160,6 +160,12 @@ public: data[i-BASE].size++; } + inline void ParallelAdd (int i, const T & acont) + { + auto oldval = AsAtomic (data[i-BASE].size)++; + ((T*)data[i-BASE].col)[oldval] = acont; + } + /// Inserts element acont into row i. 1-based. Does not test if already used, assumes to have mem inline void AddSave1 (int i, const T & acont) { diff --git a/libsrc/include/mystdlib.h b/libsrc/include/mystdlib.h index b4a049a7..b55ba3da 100644 --- a/libsrc/include/mystdlib.h +++ b/libsrc/include/mystdlib.h @@ -18,7 +18,7 @@ #include #include #include - +#include #include #include diff --git a/libsrc/meshing/clusters.cpp b/libsrc/meshing/clusters.cpp index 478013be..0c2dd7bb 100644 --- a/libsrc/meshing/clusters.cpp +++ b/libsrc/meshing/clusters.cpp @@ -16,7 +16,7 @@ namespace netgen ; } - void AnisotropicClusters :: Update(TaskManager tm) + void AnisotropicClusters :: Update(TaskManager tm, Tracer tracer) { static int timer = NgProfiler::CreateTimer ("clusters"); static int timer1 = NgProfiler::CreateTimer ("clusters1"); @@ -212,7 +212,7 @@ namespace netgen do { - + (*tracer) ("update cluster, identify", false); cnt++; changed = 0; @@ -332,6 +332,7 @@ namespace netgen } */ } + (*tracer) ("update cluster, identify", true); } while (changed); NgProfiler::StopTimer(timer3); diff --git a/libsrc/meshing/clusters.hpp b/libsrc/meshing/clusters.hpp index 1a8fce99..2cd701b4 100644 --- a/libsrc/meshing/clusters.hpp +++ b/libsrc/meshing/clusters.hpp @@ -27,7 +27,7 @@ public: AnisotropicClusters (const Mesh & amesh); ~AnisotropicClusters(); - void Update(TaskManager tm = &DummyTaskManager); + void Update(TaskManager tm = &DummyTaskManager, Tracer trace = &DummyTracer); int GetVertexRepresentant (int vnr) const { return cluster_reps.Get(vnr); } diff --git a/libsrc/meshing/meshclass.cpp b/libsrc/meshing/meshclass.cpp index 01604766..fa839789 100644 --- a/libsrc/meshing/meshclass.cpp +++ b/libsrc/meshing/meshclass.cpp @@ -5773,7 +5773,7 @@ namespace netgen { topology.Update(tm, tracer); (*tracer)("call update clusters", false); - clusters->Update(tm); + clusters->Update(tm, tracer); (*tracer)("call update clusters", true); #ifdef PARALLEL if (paralleltop) diff --git a/libsrc/meshing/topology.cpp b/libsrc/meshing/topology.cpp index 690a5148..752d16a0 100644 --- a/libsrc/meshing/topology.cpp +++ b/libsrc/meshing/topology.cpp @@ -1,17 +1,9 @@ #include #include "meshing.hpp" -#include namespace netgen { - template - inline atomic & AsAtomic (T & d) - { - return reinterpret_cast&> (d); - } - - template void QuickSortRec (FlatArray data, @@ -397,13 +389,26 @@ namespace netgen }); vert2element = TABLE (cnt); + /* for (ElementIndex ei = 0; ei < ne; ei++) { const Element & el = (*mesh)[ei]; for (int j = 0; j < el.GetNV(); j++) vert2element.AddSave (el[j], ei); } - + */ + ParallelForRange + (tm, ne, + [&] (size_t begin, size_t end) + { + for (ElementIndex ei = begin; ei < end; ei++) + { + const Element & el = (*mesh)[ei]; + for (int j = 0; j < el.GetNV(); j++) + vert2element.ParallelAdd (el[j], ei); + } + }); + cnt = 0; /* for (SurfaceElementIndex sei = 0; sei < nse; sei++)