From 1a93fb3fa5da003be01ed72c547dd97337b97f54 Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Wed, 18 Nov 2020 20:20:35 +0100 Subject: [PATCH 01/12] first attempt on memory tracing --- libsrc/core/array.hpp | 18 ++++++ libsrc/core/paje_trace.cpp | 29 +++++++++- libsrc/core/paje_trace.hpp | 38 ++++++++++++ libsrc/core/profiler.cpp | 2 + libsrc/core/profiler.hpp | 87 ++++++++++++++++++++++++++++ libsrc/core/python_ngcore_export.cpp | 4 +- libsrc/meshing/meshclass.hpp | 9 +++ 7 files changed, 183 insertions(+), 4 deletions(-) diff --git a/libsrc/core/array.hpp b/libsrc/core/array.hpp index 864fd454..350eb58e 100644 --- a/libsrc/core/array.hpp +++ b/libsrc/core/array.hpp @@ -11,6 +11,7 @@ #include "archive.hpp" #include "exception.hpp" #include "localheap.hpp" +#include "profiler.hpp" #include "utils.hpp" namespace ngcore @@ -654,6 +655,8 @@ namespace ngcore /// that's the data we have to delete, nullptr for not owning the memory T * mem_to_delete; + int mem_tracing_id = 0; + using FlatArray::size; using FlatArray::data; using FlatArray::BASE; @@ -1038,6 +1041,18 @@ namespace ngcore ngcore::Swap (data, b.data); ngcore::Swap (allocsize, b.allocsize); ngcore::Swap (mem_to_delete, b.mem_to_delete); + ngcore::Swap (mem_tracing_id, b.mem_tracing_id); + } + + NETGEN_INLINE void SetMemoryTracing (int mem_id) + { + if(!mem_tracing_id && mem_id) + TraceMemoryAlloc(mem_id, sizeof(T)*allocsize); + + if(mem_tracing_id && !mem_id) + TraceMemoryFree(mem_tracing_id, sizeof(T)*allocsize); + + mem_tracing_id = mem_id; } private: @@ -1053,6 +1068,8 @@ namespace ngcore { size_t nsize = 2 * allocsize; if (nsize < minsize) nsize = minsize; + + TraceMemoryAlloc(mem_tracing_id, sizeof(T)*nsize ); T * hdata = data; data = new T[nsize]; @@ -1069,6 +1086,7 @@ namespace ngcore for (size_t i = 0; i < mins; i++) data[i] = std::move(hdata[i]); #endif delete [] mem_to_delete; + TraceMemoryFree( mem_tracing_id, sizeof(T)*allocsize ); } mem_to_delete = data; diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index c58912a4..1c4843a6 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -36,6 +36,7 @@ namespace ngcore // increases trace by a factor of two bool PajeTrace::trace_thread_counter = false; bool PajeTrace::trace_threads = true; + bool PajeTrace::mem_tracing_enabled = true; PajeTrace :: PajeTrace(int anthreads, std::string aname) { @@ -62,6 +63,7 @@ namespace ngcore jobs.reserve(reserve_size); timer_events.reserve(reserve_size); + memory_events.reserve(1024*1024); // sync start time when running in parallel #ifdef PARALLEL @@ -72,6 +74,7 @@ namespace ngcore start_time = GetTimeCounter(); tracing_enabled = true; + mem_tracing_enabled = true; } PajeTrace :: ~PajeTrace() @@ -94,6 +97,9 @@ namespace ngcore for(auto & link : llink) link.time -= start_time; + for(auto & m : memory_events) + m.time -= start_time; + NgMPI_Comm comm(MPI_COMM_WORLD); if(comm.Size()==1) @@ -426,6 +432,7 @@ namespace ngcore const int container_type_thread = paje.DefineContainerType( container_type_task_manager, "Thread"); const int container_type_timer = container_type_thread; //paje.DefineContainerType( container_type_task_manager, "Timers"); const int container_type_jobs = paje.DefineContainerType( container_type_task_manager, "Jobs"); + const int container_type_memory = paje.DefineContainerType( container_type_task_manager, "Memory usage"); const int state_type_job = paje.DefineStateType( container_type_jobs, "Job" ); const int state_type_task = paje.DefineStateType( container_type_thread, "Task" ); @@ -433,12 +440,20 @@ namespace ngcore int variable_type_active_threads = 0; if(trace_thread_counter) - paje.DefineVariableType( container_type_jobs, "Active threads" ); + variable_type_active_threads = paje.DefineVariableType( container_type_jobs, "Active threads" ); const int container_task_manager = paje.CreateContainer( container_type_task_manager, 0, "The task manager" ); const int container_jobs = paje.CreateContainer( container_type_jobs, container_task_manager, "Jobs" ); - if(trace_thread_counter) - paje.SetVariable( 0, variable_type_active_threads, container_jobs, 0.0 ); + + int variable_type_memory = 0; + if(mem_tracing_enabled) + { + variable_type_memory = paje.DefineVariableType( container_type_task_manager, "Memory [MB]" ); + paje.SetVariable( 0, variable_type_memory, container_type_memory, 0.0 ); + } + + const int container_memory = paje.CreateContainer( container_type_memory, container_task_manager, "Memory" ); + int num_nodes = 1; //task_manager ? task_manager->GetNumNodes() : 1; std::vector thread_aliases; @@ -509,6 +524,14 @@ namespace ngcore paje.PopState( j.stop_time, state_type_job, container_jobs ); } + for(const auto & m : memory_events) + { + if(m.is_alloc) + paje.AddVariable( m.time, variable_type_memory, container_memory, 1.0*m.size / (1024*1024)); + else + paje.SubVariable( m.time, variable_type_memory, container_memory, 1.0*m.size / (1024*1024)); + } + std::set timer_ids; std::map timer_aliases; std::map timer_names; diff --git a/libsrc/core/paje_trace.hpp b/libsrc/core/paje_trace.hpp index 95c42d4a..84227fb4 100644 --- a/libsrc/core/paje_trace.hpp +++ b/libsrc/core/paje_trace.hpp @@ -23,6 +23,7 @@ namespace ngcore NGCORE_API static size_t max_tracefile_size; NGCORE_API static bool trace_thread_counter; NGCORE_API static bool trace_threads; + NGCORE_API static bool mem_tracing_enabled; bool tracing_enabled; TTimePoint start_time; @@ -35,6 +36,11 @@ namespace ngcore // be stopped if any thread reaches this number of events unsigned int max_num_events_per_thread; + static void SetTraceMemory( bool trace_memory ) + { + mem_tracing_enabled = trace_memory; + } + static void SetTraceThreads( bool atrace_threads ) { trace_threads = atrace_threads; @@ -96,10 +102,21 @@ namespace ngcore bool operator < (const ThreadLink & other) const { return time < other.time; } }; + struct MemoryEvent + { + TTimePoint time; + size_t size; + int region_id; + bool is_alloc; + + bool operator < (const MemoryEvent & other) const { return time < other.time; } + }; + std::vector > tasks; std::vector jobs; std::vector timer_events; std::vector > links; + std::vector memory_events; public: NGCORE_API void StopTracing(); @@ -129,6 +146,27 @@ namespace ngcore timer_events.push_back(TimerEvent{timer_id, GetTimeCounter(), false}); } + void AllocMemory(int id, size_t size) + { + if(!mem_tracing_enabled) return; + memory_events.push_back(MemoryEvent{GetTimeCounter(), size, id, true}); + } + + void FreeMemory(int id, size_t size) + { + if(!mem_tracing_enabled) return; + memory_events.push_back(MemoryEvent{GetTimeCounter(), size, id, false}); + } + + void ChangeMemory(int id, long long size) + { + if(size>0) + AllocMemory(id, size); + if(size<0) + FreeMemory(id, -size); + } + + NETGEN_INLINE int StartTask(int thread_id, int id, int id_type = Task::ID_NONE, int additional_value = -1) { if(!tracing_enabled) return -1; diff --git a/libsrc/core/profiler.cpp b/libsrc/core/profiler.cpp index 1190e6fe..73aad63d 100644 --- a/libsrc/core/profiler.cpp +++ b/libsrc/core/profiler.cpp @@ -113,5 +113,7 @@ namespace ngcore NgProfiler prof; // NOLINT + std::vector MemoryTracer::names{"root"}; + std::map< int, std::vector > MemoryTracer::tree; } // namespace ngcore diff --git a/libsrc/core/profiler.hpp b/libsrc/core/profiler.hpp index c16c242c..12233db1 100644 --- a/libsrc/core/profiler.hpp +++ b/libsrc/core/profiler.hpp @@ -3,6 +3,7 @@ #include #include +#include #include #include "logging.hpp" @@ -299,6 +300,92 @@ namespace ngcore return tres; } + + class MemoryTracer + { + NGCORE_API static std::vector names; + static int GetId(std::string name) + { + int id = names.size(); + names.push_back(name); + if(id==10*NgProfiler::SIZE) + std::cerr << "Allocated " << id << " MemoryTracer objects" << std::endl; + return id; + } + + NGCORE_API static std::map< int, std::vector > tree; + + int id; + std::vector> tracks; + + public: + + MemoryTracer( std::string name ) + { + id = GetId(name); + } + + template + MemoryTracer( std::string name, TRest & ... rest ) + { + id = GetId(name); + Track(rest...); + } + + template + void Track( T1 & obj, std::string name, TRest & ... rest ) + { + Track(obj, name); + Track(rest...); + } + + template + void Track( T & obj, std::string name ) + { + int child_id = GetId(name); + tree[id].push_back(child_id); + obj.SetMemoryTracing(child_id); + tracks.push_back( [&obj] () { obj.SetMemoryTracing(0); } ); + } + + template + void Track( T & obj ) + { + auto & mt = obj.GetMemoryTracer(); + int child_id = mt.id; + tree[id].push_back(child_id); + } + + void StopTracking() + { + for(auto & f : tracks) + f(); + tracks.clear(); + } + + static std::string GetName(int id) + { + return names[id]; + } + + ~MemoryTracer() + { + StopTracking(); + } + }; + + NETGEN_INLINE void TraceMemoryAlloc( int mem_id, size_t size ) + { + if(mem_id && trace) + trace->AllocMemory(mem_id, size); + } + + NETGEN_INLINE void TraceMemoryFree( int mem_id, size_t size ) + { + if(mem_id && trace) + trace->FreeMemory(mem_id, size); + } + } // namespace ngcore // Helper macro to easily add multiple timers in a function for profiling diff --git a/libsrc/core/python_ngcore_export.cpp b/libsrc/core/python_ngcore_export.cpp index 4f93168e..08d41ef4 100644 --- a/libsrc/core/python_ngcore_export.cpp +++ b/libsrc/core/python_ngcore_export.cpp @@ -247,15 +247,17 @@ threads : int ; py::class_(m, "PajeTrace") - .def(py::init( [] (string filename, size_t size_mb, bool threads, bool thread_counter) + .def(py::init( [] (string filename, size_t size_mb, bool threads, bool thread_counter, bool memory) { PajeTrace::SetMaxTracefileSize(size_mb*1014*1024); PajeTrace::SetTraceThreads(threads); + PajeTrace::SetTraceMemory(memory); PajeTrace::SetTraceThreadCounter(thread_counter); trace = new PajeTrace(TaskManager::GetMaxThreads(), filename); return trace; }), py::arg("filename")="ng.trace", py::arg("size")=1000, py::arg("threads")=true, py::arg("thread_counter")=false, + py::arg("memory")=true, "size in Megabytes" ) .def("__enter__", [](PajeTrace & self) { }) diff --git a/libsrc/meshing/meshclass.hpp b/libsrc/meshing/meshclass.hpp index 1de31c97..aab4e87a 100644 --- a/libsrc/meshing/meshclass.hpp +++ b/libsrc/meshing/meshclass.hpp @@ -925,6 +925,15 @@ namespace netgen shared_ptr Mirror( netgen::Point<3> p, Vec<3> n ); + private: + MemoryTracer mem_tracer = {"Mesh", + points, "points", + segments, "segments", + surfelements, "surfelements", + volelements, "volelements" + }; + public: + const MemoryTracer & GetMemoryTracer() { return mem_tracer; } }; inline ostream& operator<<(ostream& ost, const Mesh& mesh) From f143995f27e140155d851636d176d9107cde3288 Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Wed, 18 Nov 2020 21:45:00 +0100 Subject: [PATCH 02/12] clean up memory tracing --- libsrc/core/paje_trace.cpp | 10 +++++----- libsrc/core/profiler.hpp | 14 -------------- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index 1c4843a6..23dcb154 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -446,14 +446,13 @@ namespace ngcore const int container_jobs = paje.CreateContainer( container_type_jobs, container_task_manager, "Jobs" ); int variable_type_memory = 0; + const int container_memory = paje.CreateContainer( container_type_memory, container_task_manager, "Memory" ); if(mem_tracing_enabled) { variable_type_memory = paje.DefineVariableType( container_type_task_manager, "Memory [MB]" ); - paje.SetVariable( 0, variable_type_memory, container_type_memory, 0.0 ); + paje.SetVariable( 0, variable_type_memory, container_memory, 0.0 ); } - const int container_memory = paje.CreateContainer( container_type_memory, container_task_manager, "Memory" ); - int num_nodes = 1; //task_manager ? task_manager->GetNumNodes() : 1; std::vector thread_aliases; @@ -526,10 +525,11 @@ namespace ngcore for(const auto & m : memory_events) { + double size = 1.0*m.size/(1024*1024); if(m.is_alloc) - paje.AddVariable( m.time, variable_type_memory, container_memory, 1.0*m.size / (1024*1024)); + paje.AddVariable( m.time, variable_type_memory, container_memory, size); else - paje.SubVariable( m.time, variable_type_memory, container_memory, 1.0*m.size / (1024*1024)); + paje.SubVariable( m.time, variable_type_memory, container_memory, size); } std::set timer_ids; diff --git a/libsrc/core/profiler.hpp b/libsrc/core/profiler.hpp index 12233db1..d3b06746 100644 --- a/libsrc/core/profiler.hpp +++ b/libsrc/core/profiler.hpp @@ -316,7 +316,6 @@ namespace ngcore NGCORE_API static std::map< int, std::vector > tree; int id; - std::vector> tracks; public: @@ -345,7 +344,6 @@ namespace ngcore int child_id = GetId(name); tree[id].push_back(child_id); obj.SetMemoryTracing(child_id); - tracks.push_back( [&obj] () { obj.SetMemoryTracing(0); } ); } template @@ -356,22 +354,10 @@ namespace ngcore tree[id].push_back(child_id); } - void StopTracking() - { - for(auto & f : tracks) - f(); - tracks.clear(); - } - static std::string GetName(int id) { return names[id]; } - - ~MemoryTracer() - { - StopTracking(); - } }; NETGEN_INLINE void TraceMemoryAlloc( int mem_id, size_t size ) From a17066a387b28b7ae277a3a82ed8adac5f425c93 Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Thu, 19 Nov 2020 14:57:45 +0100 Subject: [PATCH 03/12] html chart for peak memory consumption, some Array tracing fixes --- libsrc/core/array.hpp | 12 +- libsrc/core/paje_trace.cpp | 277 ++++++++++++++++++++++++++---------- libsrc/core/paje_trace.hpp | 2 +- libsrc/core/profiler.hpp | 89 ++++++++++-- libsrc/core/taskmanager.cpp | 2 +- 5 files changed, 290 insertions(+), 92 deletions(-) diff --git a/libsrc/core/array.hpp b/libsrc/core/array.hpp index 350eb58e..db5d7561 100644 --- a/libsrc/core/array.hpp +++ b/libsrc/core/array.hpp @@ -701,6 +701,8 @@ namespace ngcore NETGEN_INLINE Array (Array && a2) { + TraceMemoryChange(mem_tracing_id, sizeof(T)*(a2.allocsize-allocsize)); + size = a2.size; data = a2.data; allocsize = a2.allocsize; @@ -772,6 +774,7 @@ namespace ngcore NETGEN_INLINE ~Array() { delete [] mem_to_delete; + TraceMemoryFree(mem_tracing_id, sizeof(T)*allocsize); } // Only provide this function if T is archivable @@ -826,6 +829,7 @@ namespace ngcore NETGEN_INLINE const Array & Assign (size_t asize, LocalHeap & lh) { delete [] mem_to_delete; + TraceMemoryFree(mem_tracing_id, sizeof(T)*allocsize); size = allocsize = asize; data = lh.Alloc (asize); mem_to_delete = nullptr; @@ -933,6 +937,7 @@ namespace ngcore NETGEN_INLINE void DeleteAll () { delete [] mem_to_delete; + TraceMemoryFree(mem_tracing_id, sizeof(T)*allocsize); mem_to_delete = NULL; data = 0; size = allocsize = 0; @@ -964,6 +969,8 @@ namespace ngcore /// steal array NETGEN_INLINE Array & operator= (Array && a2) { + TraceMemoryChange(mem_tracing_id, sizeof(T)*(a2.allocsize-allocsize)); + ngcore::Swap (size, a2.size); ngcore::Swap (data, a2.data); ngcore::Swap (allocsize, a2.allocsize); @@ -1037,6 +1044,8 @@ namespace ngcore NETGEN_INLINE void Swap (Array & b) { + TraceMemoryChange(mem_tracing_id, sizeof(T)*(b.allocsize-allocsize)); + ngcore::Swap (size, b.size); ngcore::Swap (data, b.data); ngcore::Swap (allocsize, b.allocsize); @@ -1068,11 +1077,10 @@ namespace ngcore { size_t nsize = 2 * allocsize; if (nsize < minsize) nsize = minsize; - - TraceMemoryAlloc(mem_tracing_id, sizeof(T)*nsize ); T * hdata = data; data = new T[nsize]; + TraceMemoryAlloc(mem_tracing_id, sizeof(T)*nsize ); if (hdata) { diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index 23dcb154..fdf8a28b 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -243,7 +243,8 @@ namespace ngcore PajeFile( const std::string & filename) { - ctrace_stream = fopen (filename.c_str(),"w"); // NOLINT + std::string fname = filename + ".trace"; + ctrace_stream = fopen (fname.c_str(),"w"); // NOLINT fprintf(ctrace_stream, "%s", header ); // NOLINT alias_counter = 0; } @@ -809,24 +810,25 @@ namespace ngcore int id = 0; std::map children; double chart_size = 0.0; // time without children (the chart lib accumulates children sizes again) - double time = 0.0; - double min_time = 1e99; - double max_time = 0.0; - size_t calls = 0; + double size = 0.0; + double min_size = 1e99; + double max_size = 0.0; std::string name; + + size_t calls = 0; TTimePoint start_time = 0; }; - void PrintNode (const TreeNode &n, int &level, std::ofstream & f); - void PrintNode (const TreeNode &n, int &level, std::ofstream & f) + void PrintNode (const TreeNode &n, std::ofstream & f) { f << "{ name: \"" + n.name + "\""; f << ", calls: " << n.calls; f << ", size: " << n.chart_size; - f << ", time: " << n.time; - f << ", min: " << n.min_time; - f << ", max: " << n.max_time; - f << ", avg: " << n.time/n.calls; + f << ", value: " << n.size; + f << ", min: " << n.min_size; + f << ", max: " << n.max_size; + if(n.calls) + f << ", avg: " << n.size/n.calls; int size = n.children.size(); if(size>0) { @@ -834,7 +836,7 @@ namespace ngcore f << ", children: ["; for(auto & c : n.children) { - PrintNode(c.second, level, f); + PrintNode(c.second, f); if(++i + + + + +)CODE_"; + if(!time_or_memory) + f << "Maximum Memory Consumption\n"; + f << R"CODE_( + + +
+ + + +)CODE_" << std::endl; + + + } + + void WriteMemorySunburstHTML( std::vector & events, std::string filename ) + { + size_t mem_allocated; + size_t max_mem_allocated; + size_t imax_mem_allocated; + + const auto & names = MemoryTracer::GetNames(); + const auto & tree = MemoryTracer::GetTree(); + auto N = names.size(); + + Array mem_allocated_id(N); + mem_allocated_id = 0; + + // Find point with maximum memory allocation, check for missing allocs/frees + for(auto i : IntRange(events.size())) + { + const auto & ev = events[i]; + + if(ev.is_alloc) + { + mem_allocated += ev.size; + mem_allocated_id[ev.id] += ev.size; + if(mem_allocated > max_mem_allocated) + { + imax_mem_allocated = i; + max_mem_allocated = mem_allocated; + } + } + else + { + if(ev.size > mem_allocated) + std::cerr << "Error in memory tracer: have total allocated memory < 0" << std::endl; + if(ev.size > mem_allocated_id[ev.id]) + std::cerr << "Error in memory tracer: have allocated memory < 0 in tracer " << names[ev.id] << std::endl; + + mem_allocated -= ev.size; + mem_allocated_id[ev.id] -= ev.size; + } + } + + // reconstruct again the memory consumption after event imax_mem_allocated + mem_allocated_id = 0; + for(auto i : IntRange(imax_mem_allocated+1)) + { + const auto & ev = events[i]; + + if(ev.is_alloc) + mem_allocated_id[ev.id] += ev.size; + else + mem_allocated_id[ev.id] -= ev.size; + } + + TreeNode root; + root.name="all"; + + Array nodes(N); + nodes = nullptr; + + // find root nodes in memory tracer tree, i.e. they have no parents + Array parents(N); + parents = -1; + for( const auto & [iparent, children] : tree ) + for (auto child_id : children) + { + if(parents[child_id] != -1) + std::cerr << "Error in memory tracer: multiple parents found for " << names[child_id] << std::endl; + parents[child_id] = iparent; + } + + for(auto i : IntRange(1, N)) + { + TreeNode * parent = &root; + if(parents[i]!=-1) + parent = nodes[parents[i]]; + + auto & node = parent->children[i]; + nodes[i] = &node; + node.id = i; + node.chart_size = mem_allocated_id[i]; + node.size = mem_allocated_id[i]; + node.name = names[i]; + } + + for(auto i : IntRange(1, N)) + if(parents[N-i]==-1) + root.size += nodes[N-i]->size; + else + nodes[parents[N-i]]->size += nodes[N-i]->size; + + WriteSunburstHTML( root, filename, false ); + + } + void PajeTrace::WriteSunburstHTML( ) { std::vector events; @@ -884,10 +1063,10 @@ namespace ngcore std::sort (events.begin(), events.end()); - root.time = 1000.0*static_cast(stop_time) * seconds_per_tick; + root.size = 1000.0*static_cast(stop_time) * seconds_per_tick; root.calls = 1; - root.min_time = root.time; - root.max_time = root.time; + root.min_size = root.size; + root.max_size = root.size; for(auto & event : events) { @@ -904,7 +1083,7 @@ namespace ngcore if(need_init) { current->name = is_timer_event ? GetTimerName(id) : job_names[id]; - current->time = 0.0; + current->size = 0.0; current->id = id; } @@ -916,73 +1095,23 @@ namespace ngcore std::cout << "node stack empty!" << std::endl; break; } - double time = 1000.0*static_cast(event.time-current->start_time) * seconds_per_tick; - current->time += time; - current->chart_size += time; - current->min_time = std::min(current->min_time, time); - current->max_time = std::max(current->max_time, time); + double size = 1000.0*static_cast(event.time-current->start_time) * seconds_per_tick; + current->size += size; + current->chart_size += size; + current->min_size = std::min(current->min_size, size); + current->max_size = std::max(current->max_size, size); current->calls++; current = node_stack.back(); - current->chart_size -= time; + current->chart_size -= size; node_stack.pop_back(); } } root.chart_size = 0.0; - int level = 0; - std::ofstream f(tracefile_name+".html"); - f.precision(4); - f << R"CODE_( - - - - - - - -
- - - -)CODE_" << std::endl; + ngcore::WriteSunburstHTML( root, tracefile_name, true ); + WriteMemorySunburstHTML( memory_events, tracefile_name+"_memory" ); } } // namespace ngcore diff --git a/libsrc/core/paje_trace.hpp b/libsrc/core/paje_trace.hpp index 84227fb4..22fd0f50 100644 --- a/libsrc/core/paje_trace.hpp +++ b/libsrc/core/paje_trace.hpp @@ -106,7 +106,7 @@ namespace ngcore { TTimePoint time; size_t size; - int region_id; + int id; bool is_alloc; bool operator < (const MemoryEvent & other) const { return time < other.time; } diff --git a/libsrc/core/profiler.hpp b/libsrc/core/profiler.hpp index d3b06746..ab9785ea 100644 --- a/libsrc/core/profiler.hpp +++ b/libsrc/core/profiler.hpp @@ -300,10 +300,48 @@ namespace ngcore return tres; } + class MemoryTracer; + + namespace detail + { + //Type trait to check if a class implements a 'const MemoryTracer& GetMemoryTracer()' function + template + struct has_GetMemoryTracer + { + private: + template + static constexpr auto check(T2*) -> + typename std::is_same().GetMemoryTracer()),const MemoryTracer &>::type; + template + static constexpr std::false_type check(...); + using type = decltype(check(nullptr)); // NOLINT + public: + static constexpr bool value = type::value; + }; + + //Type trait to check if a class implements a 'void SetMemoryTacing(int)' function + template + struct has_SetMemoryTracing + { + private: + template + static constexpr auto check(T2*) -> + typename std::is_same().SetMemoryTracing(0)),void>::type; + template + static constexpr std::false_type check(...); + using type = decltype(check(nullptr)); // NOLINT + public: + static constexpr bool value = type::value; + }; + + + } // namespace detail class MemoryTracer { NGCORE_API static std::vector names; + NGCORE_API static std::map< int, std::vector > tree; + static int GetId(std::string name) { int id = names.size(); @@ -313,7 +351,6 @@ namespace ngcore return id; } - NGCORE_API static std::map< int, std::vector > tree; int id; @@ -332,32 +369,50 @@ namespace ngcore } template - void Track( T1 & obj, std::string name, TRest & ... rest ) + void Track( T1 & obj, std::string name, TRest & ... rest ) const { Track(obj, name); Track(rest...); } template - void Track( T & obj, std::string name ) + void Track( T & obj, std::string name ) const { - int child_id = GetId(name); - tree[id].push_back(child_id); - obj.SetMemoryTracing(child_id); - } - - template - void Track( T & obj ) - { - auto & mt = obj.GetMemoryTracer(); - int child_id = mt.id; - tree[id].push_back(child_id); + if constexpr(detail::has_SetMemoryTracing::value) + { + int child_id = GetId(name); + tree[id].push_back(child_id); + obj.SetMemoryTracing(child_id); + } + if constexpr(detail::has_GetMemoryTracer::value) + { + auto & mt = obj.GetMemoryTracer(); + int child_id = mt.id; + if(name!="") + names[mt.id] = name; + tree[id].push_back(child_id); + } } static std::string GetName(int id) { return names[id]; } + + std::string GetName() const + { + return names[id]; + } + + void SetName(std::string name) const + { + names[id] = name; + } + + + static const std::vector & GetNames() { return names; } + static const std::map> & GetTree() { return tree; } + }; NETGEN_INLINE void TraceMemoryAlloc( int mem_id, size_t size ) @@ -372,6 +427,12 @@ namespace ngcore trace->FreeMemory(mem_id, size); } + NETGEN_INLINE void TraceMemoryChange( int mem_id, long long size ) + { + if(mem_id && trace) + trace->ChangeMemory(mem_id, size); + } + } // namespace ngcore // Helper macro to easily add multiple timers in a function for profiling diff --git a/libsrc/core/taskmanager.cpp b/libsrc/core/taskmanager.cpp index be345321..a1049a1c 100644 --- a/libsrc/core/taskmanager.cpp +++ b/libsrc/core/taskmanager.cpp @@ -160,7 +160,7 @@ namespace ngcore static int cnt = 0; if (use_paje_trace) - trace = new PajeTrace(num_threads, "ng" + ToString(cnt++) + ".trace"); + trace = new PajeTrace(num_threads, "ng" + ToString(cnt++)); } From b00c56a012064e61dc9070cbd7eb63e6caaa5b81 Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Thu, 19 Nov 2020 14:58:16 +0100 Subject: [PATCH 04/12] mem tracing - set name for tempmesh in delaunay --- libsrc/meshing/delaunay.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libsrc/meshing/delaunay.cpp b/libsrc/meshing/delaunay.cpp index adc64712..cd675ef3 100644 --- a/libsrc/meshing/delaunay.cpp +++ b/libsrc/meshing/delaunay.cpp @@ -772,6 +772,7 @@ namespace netgen // improve delaunay - mesh by swapping !!!! Mesh tempmesh; + tempmesh.GetMemoryTracer().SetName("delaunay-tempmesh"); for (auto & meshpoint : mesh.Points()) tempmesh.AddPoint (meshpoint); From 6f98123e98c098e7a512e283591d20692e7454ca Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Thu, 19 Nov 2020 16:16:39 +0100 Subject: [PATCH 05/12] mem tracing - use topological sorting, some fixes --- libsrc/core/paje_trace.cpp | 57 ++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index fdf8a28b..856e2214 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -931,15 +931,16 @@ namespace ngcore void WriteMemorySunburstHTML( std::vector & events, std::string filename ) { - size_t mem_allocated; - size_t max_mem_allocated; - size_t imax_mem_allocated; + size_t mem_allocated = 0; + size_t max_mem_allocated = 0; + size_t imax_mem_allocated = 0; const auto & names = MemoryTracer::GetNames(); const auto & tree = MemoryTracer::GetTree(); - auto N = names.size(); + size_t N = names.size(); - Array mem_allocated_id(N); + Array mem_allocated_id; + mem_allocated_id.SetSize(N); mem_allocated_id = 0; // Find point with maximum memory allocation, check for missing allocs/frees @@ -984,11 +985,16 @@ namespace ngcore TreeNode root; root.name="all"; - Array nodes(N); + Array nodes; + nodes.SetSize(N); nodes = nullptr; + Array sorting; // topological sorting (parents before children) + sorting.SetAllocSize(N); + ArrayMem stack; // find root nodes in memory tracer tree, i.e. they have no parents - Array parents(N); + Array parents; + parents.SetSize(N); parents = -1; for( const auto & [iparent, children] : tree ) for (auto child_id : children) @@ -999,10 +1005,29 @@ namespace ngcore } for(auto i : IntRange(1, N)) + if(parents[i]==-1) + { + sorting.Append(i); + if(tree.count(i)) + stack.Append(i); + } + + while(stack.Size()) { - TreeNode * parent = &root; - if(parents[i]!=-1) - parent = nodes[parents[i]]; + auto current = stack.Last(); + stack.DeleteLast(); + + for(const auto child : tree.at(current)) + { + sorting.Append(child); + if(tree.count(child)) + stack.Append(child); + } + } + + for(auto i : sorting) + { + TreeNode * parent = (parents[i]==-1) ? &root : nodes[parents[i]]; auto & node = parent->children[i]; nodes[i] = &node; @@ -1012,11 +1037,15 @@ namespace ngcore node.name = names[i]; } - for(auto i : IntRange(1, N)) - if(parents[N-i]==-1) - root.size += nodes[N-i]->size; + for(auto i_ : Range(sorting)) + { + // reverse topological order to accumulate total memory usage of all children + auto i = sorting[sorting.Size()-1-i_]; + if(parents[i]==-1) + root.size += nodes[i]->size; else - nodes[parents[N-i]]->size += nodes[N-i]->size; + nodes[parents[i]]->size += nodes[i]->size; + } WriteSunburstHTML( root, filename, false ); From f0152baacfdf79a1353e1a88fc21540fcfcc7903 Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Thu, 19 Nov 2020 17:35:29 +0100 Subject: [PATCH 06/12] mem tracing - TraceMemorySwap helper function --- libsrc/core/array.hpp | 7 +++---- libsrc/core/profiler.hpp | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/libsrc/core/array.hpp b/libsrc/core/array.hpp index db5d7561..60132186 100644 --- a/libsrc/core/array.hpp +++ b/libsrc/core/array.hpp @@ -701,7 +701,7 @@ namespace ngcore NETGEN_INLINE Array (Array && a2) { - TraceMemoryChange(mem_tracing_id, sizeof(T)*(a2.allocsize-allocsize)); + TraceMemorySwap(mem_tracing_id, sizeof(T)*allocsize, a2.mem_tracing_id, sizeof(T)*a2.allocsize); size = a2.size; data = a2.data; @@ -969,7 +969,7 @@ namespace ngcore /// steal array NETGEN_INLINE Array & operator= (Array && a2) { - TraceMemoryChange(mem_tracing_id, sizeof(T)*(a2.allocsize-allocsize)); + TraceMemorySwap(mem_tracing_id, sizeof(T)*allocsize, a2.mem_tracing_id, sizeof(T)*a2.allocsize); ngcore::Swap (size, a2.size); ngcore::Swap (data, a2.data); @@ -1044,13 +1044,12 @@ namespace ngcore NETGEN_INLINE void Swap (Array & b) { - TraceMemoryChange(mem_tracing_id, sizeof(T)*(b.allocsize-allocsize)); + TraceMemorySwap(mem_tracing_id, sizeof(T)*allocsize, b.mem_tracing_id, sizeof(T)*b.allocsize); ngcore::Swap (size, b.size); ngcore::Swap (data, b.data); ngcore::Swap (allocsize, b.allocsize); ngcore::Swap (mem_to_delete, b.mem_to_delete); - ngcore::Swap (mem_tracing_id, b.mem_tracing_id); } NETGEN_INLINE void SetMemoryTracing (int mem_id) diff --git a/libsrc/core/profiler.hpp b/libsrc/core/profiler.hpp index ab9785ea..cb07d053 100644 --- a/libsrc/core/profiler.hpp +++ b/libsrc/core/profiler.hpp @@ -433,6 +433,28 @@ namespace ngcore trace->ChangeMemory(mem_id, size); } + NETGEN_INLINE void TraceMemorySwap( int mem_id, size_t size, int mem_id2, size_t size2 ) + { + if(!trace || (mem_id==0 && mem_id2==0)) + return; + if(mem_id == 0) + return trace->ChangeMemory(mem_id2, size-size2); + if(mem_id2 == 0) + return trace->ChangeMemory(mem_id, size2-size); + + // first decrease memory, otherwise have artificial/wrong high peak memory usage + if(sizeChangeMemory(mem_id2, size-size2); + trace->ChangeMemory(mem_id, size2-size); + } + else + { + trace->ChangeMemory(mem_id, size2-size); + trace->ChangeMemory(mem_id2, size-size2); + } + } + } // namespace ngcore // Helper macro to easily add multiple timers in a function for profiling From 87623981a6c66df3d439ba951b64e9dc700361d6 Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Thu, 19 Nov 2020 19:29:04 +0100 Subject: [PATCH 07/12] export PajeTrace.WriteMemoryChart() to python --- libsrc/core/paje_trace.cpp | 18 ++++++++++-------- libsrc/core/paje_trace.hpp | 3 ++- libsrc/core/python_ngcore_export.cpp | 7 ++++--- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index 856e2214..4e5331e6 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -742,7 +742,8 @@ namespace ngcore } } } - WriteSunburstHTML(); + WriteTimingChart(); + WriteMemoryChart(""); paje.WriteEvents(); } @@ -929,8 +930,10 @@ namespace ngcore } - void WriteMemorySunburstHTML( std::vector & events, std::string filename ) + void PajeTrace::WriteMemoryChart( std::string fname ) { + if(fname=="") + fname = tracefile_name + "_memory"; size_t mem_allocated = 0; size_t max_mem_allocated = 0; size_t imax_mem_allocated = 0; @@ -944,9 +947,9 @@ namespace ngcore mem_allocated_id = 0; // Find point with maximum memory allocation, check for missing allocs/frees - for(auto i : IntRange(events.size())) + for(auto i : IntRange(memory_events.size())) { - const auto & ev = events[i]; + const auto & ev = memory_events[i]; if(ev.is_alloc) { @@ -974,7 +977,7 @@ namespace ngcore mem_allocated_id = 0; for(auto i : IntRange(imax_mem_allocated+1)) { - const auto & ev = events[i]; + const auto & ev = memory_events[i]; if(ev.is_alloc) mem_allocated_id[ev.id] += ev.size; @@ -1047,11 +1050,11 @@ namespace ngcore nodes[parents[i]]->size += nodes[i]->size; } - WriteSunburstHTML( root, filename, false ); + WriteSunburstHTML( root, fname, false ); } - void PajeTrace::WriteSunburstHTML( ) + void PajeTrace::WriteTimingChart( ) { std::vector events; @@ -1140,7 +1143,6 @@ namespace ngcore root.chart_size = 0.0; ngcore::WriteSunburstHTML( root, tracefile_name, true ); - WriteMemorySunburstHTML( memory_events, tracefile_name+"_memory" ); } } // namespace ngcore diff --git a/libsrc/core/paje_trace.hpp b/libsrc/core/paje_trace.hpp index 22fd0f50..421f866e 100644 --- a/libsrc/core/paje_trace.hpp +++ b/libsrc/core/paje_trace.hpp @@ -30,7 +30,8 @@ namespace ngcore int nthreads; public: - void WriteSunburstHTML(); + NGCORE_API void WriteTimingChart(); + NGCORE_API void WriteMemoryChart( std::string fname ); // Approximate number of events to trace. Tracing will // be stopped if any thread reaches this number of events diff --git a/libsrc/core/python_ngcore_export.cpp b/libsrc/core/python_ngcore_export.cpp index 08d41ef4..59d1a13f 100644 --- a/libsrc/core/python_ngcore_export.cpp +++ b/libsrc/core/python_ngcore_export.cpp @@ -263,9 +263,10 @@ threads : int .def("__enter__", [](PajeTrace & self) { }) .def("__exit__", [](PajeTrace & self, py::args) { self.StopTracing(); }) .def("__del__", [](PajeTrace & self) { trace = nullptr; }) - .def("SetTraceThreads", &PajeTrace::SetTraceThreads) - .def("SetTraceThreadCounter", &PajeTrace::SetTraceThreadCounter) - .def("SetMaxTracefileSize", &PajeTrace::SetMaxTracefileSize) + .def_static("SetTraceThreads", &PajeTrace::SetTraceThreads) + .def_static("SetTraceThreadCounter", &PajeTrace::SetTraceThreadCounter) + .def_static("SetMaxTracefileSize", &PajeTrace::SetMaxTracefileSize) + .def_static("WriteMemoryChart", [](string filename){ if(trace) trace->WriteMemoryChart(filename); }, py::arg("filename")="memory" ) ; From a69cdc9000794a427a0176e1c63ba7cf423f043e Mon Sep 17 00:00:00 2001 From: Christopher Lackner Date: Sat, 21 Nov 2020 15:49:07 +0100 Subject: [PATCH 08/12] mem tracing compile time option, simplify by MemoryTracer as member --- CMakeLists.txt | 1 + cmake/SuperBuild.cmake | 1 + libsrc/core/CMakeLists.txt | 5 + libsrc/core/array.hpp | 30 +++-- libsrc/core/bitarray.cpp | 7 +- libsrc/core/bitarray.hpp | 9 ++ libsrc/core/paje_trace.cpp | 4 + libsrc/core/paje_trace.hpp | 2 + libsrc/core/profiler.cpp | 2 + libsrc/core/profiler.hpp | 163 +++++++++++++-------------- libsrc/core/python_ngcore_export.cpp | 2 + libsrc/core/table.hpp | 13 +++ libsrc/meshing/boundarylayer.cpp | 3 + 13 files changed, 142 insertions(+), 100 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7916e6ee..26dfc2e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ option( BUILD_STUB_FILES "Build stub files for better autocompletion" ON) option( BUILD_FOR_CONDA "Link python libraries only to executables" OFF) option( USE_SUPERBUILD "use ccache" ON) +option( TRACE_MEMORY "Enable memory tracing" OFF) set(NG_COMPILE_FLAGS "" CACHE STRING "Additional compile flags") diff --git a/cmake/SuperBuild.cmake b/cmake/SuperBuild.cmake index 37709768..6e9600df 100644 --- a/cmake/SuperBuild.cmake +++ b/cmake/SuperBuild.cmake @@ -143,6 +143,7 @@ set_vars( NETGEN_CMAKE_ARGS USE_SPDLOG DEBUG_LOG CHECK_RANGE + TRACE_MEMORY BUILD_STUB_FILES BUILD_FOR_CONDA NG_COMPILE_FLAGS diff --git a/libsrc/core/CMakeLists.txt b/libsrc/core/CMakeLists.txt index 8329a195..c3eba6a5 100644 --- a/libsrc/core/CMakeLists.txt +++ b/libsrc/core/CMakeLists.txt @@ -42,6 +42,11 @@ if(CHECK_RANGE OR CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL target_compile_definitions(ngcore PUBLIC NETGEN_ENABLE_CHECK_RANGE) endif(CHECK_RANGE OR CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "DEBUG") +if(TRACE_MEMORY OR CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "DEBUG") + target_compile_definitions(ngcore PUBLIC NETGEN_TRACE_MEMORY) +endif(TRACE_MEMORY OR CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "DEBUG") + + if(USE_SPDLOG) include_directories(${SPDLOG_INCLUDE_DIR}) install(DIRECTORY ${SPDLOG_INCLUDE_DIR} diff --git a/libsrc/core/array.hpp b/libsrc/core/array.hpp index 60132186..752f6f64 100644 --- a/libsrc/core/array.hpp +++ b/libsrc/core/array.hpp @@ -655,7 +655,6 @@ namespace ngcore /// that's the data we have to delete, nullptr for not owning the memory T * mem_to_delete; - int mem_tracing_id = 0; using FlatArray::size; using FlatArray::data; @@ -701,7 +700,7 @@ namespace ngcore NETGEN_INLINE Array (Array && a2) { - TraceMemorySwap(mem_tracing_id, sizeof(T)*allocsize, a2.mem_tracing_id, sizeof(T)*a2.allocsize); + mt.Swap(sizeof(T) * allocsize, a2.mt, sizeof(T) * a2.allocsize); size = a2.size; data = a2.data; @@ -774,7 +773,7 @@ namespace ngcore NETGEN_INLINE ~Array() { delete [] mem_to_delete; - TraceMemoryFree(mem_tracing_id, sizeof(T)*allocsize); + mt.Free(sizeof(T)*allocsize); } // Only provide this function if T is archivable @@ -829,7 +828,7 @@ namespace ngcore NETGEN_INLINE const Array & Assign (size_t asize, LocalHeap & lh) { delete [] mem_to_delete; - TraceMemoryFree(mem_tracing_id, sizeof(T)*allocsize); + mt.Free(sizeof(T)*allocsize); size = allocsize = asize; data = lh.Alloc (asize); mem_to_delete = nullptr; @@ -937,7 +936,7 @@ namespace ngcore NETGEN_INLINE void DeleteAll () { delete [] mem_to_delete; - TraceMemoryFree(mem_tracing_id, sizeof(T)*allocsize); + mt.Free(sizeof(T)*allocsize); mem_to_delete = NULL; data = 0; size = allocsize = 0; @@ -969,7 +968,7 @@ namespace ngcore /// steal array NETGEN_INLINE Array & operator= (Array && a2) { - TraceMemorySwap(mem_tracing_id, sizeof(T)*allocsize, a2.mem_tracing_id, sizeof(T)*a2.allocsize); + mt.Swap(sizeof(T)*allocsize, a2.mt, sizeof(T)*a2.allocsize); ngcore::Swap (size, a2.size); ngcore::Swap (data, a2.data); @@ -1044,7 +1043,7 @@ namespace ngcore NETGEN_INLINE void Swap (Array & b) { - TraceMemorySwap(mem_tracing_id, sizeof(T)*allocsize, b.mem_tracing_id, sizeof(T)*b.allocsize); + mt.Swap(sizeof(T) * allocsize, b.mt, sizeof(T) * b.allocsize); ngcore::Swap (size, b.size); ngcore::Swap (data, b.data); @@ -1052,21 +1051,18 @@ namespace ngcore ngcore::Swap (mem_to_delete, b.mem_to_delete); } - NETGEN_INLINE void SetMemoryTracing (int mem_id) + NETGEN_INLINE void StartMemoryTracing () const { - if(!mem_tracing_id && mem_id) - TraceMemoryAlloc(mem_id, sizeof(T)*allocsize); - - if(mem_tracing_id && !mem_id) - TraceMemoryFree(mem_tracing_id, sizeof(T)*allocsize); - - mem_tracing_id = mem_id; + mt.Alloc(sizeof(T) * allocsize); } + const MemoryTracer& GetMemoryTracer() const { return mt; } + private: /// resize array, at least to size minsize. copy contents NETGEN_INLINE void ReSize (size_t minsize); + MemoryTracer mt; }; @@ -1079,7 +1075,7 @@ namespace ngcore T * hdata = data; data = new T[nsize]; - TraceMemoryAlloc(mem_tracing_id, sizeof(T)*nsize ); + mt.Alloc(sizeof(T) * nsize); if (hdata) { @@ -1093,7 +1089,7 @@ namespace ngcore for (size_t i = 0; i < mins; i++) data[i] = std::move(hdata[i]); #endif delete [] mem_to_delete; - TraceMemoryFree( mem_tracing_id, sizeof(T)*allocsize ); + mt.Free(sizeof(T) * allocsize); } mem_to_delete = data; diff --git a/libsrc/core/bitarray.cpp b/libsrc/core/bitarray.cpp index bc923ef1..6b1deac5 100644 --- a/libsrc/core/bitarray.cpp +++ b/libsrc/core/bitarray.cpp @@ -36,10 +36,15 @@ namespace ngcore void BitArray :: SetSize (size_t asize) { if (size == asize) return; - if (owns_data) delete [] data; + if (owns_data) + { + delete [] data; + mt.Free(Addr(size)+1); + } size = asize; data = new unsigned char [Addr (size)+1]; + mt.Alloc(Addr(size)+1); } BitArray & BitArray :: Set () throw() diff --git a/libsrc/core/bitarray.hpp b/libsrc/core/bitarray.hpp index 768b40df..cd8979de 100644 --- a/libsrc/core/bitarray.hpp +++ b/libsrc/core/bitarray.hpp @@ -150,6 +150,14 @@ public: NGCORE_API void DoArchive(Archive& archive); NGCORE_API auto * Data() const { return data; } + + const MemoryTracer& GetMemoryTracer() const { return mt; } + void StartMemoryTracing() const + { + if(owns_data) + mt.Alloc(Addr(size)+1); + } + private: /// unsigned char Mask (size_t i) const @@ -159,6 +167,7 @@ private: size_t Addr (size_t i) const { return (i / CHAR_BIT); } + MemoryTracer mt; }; diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index 4e5331e6..4e387ec2 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -743,7 +743,9 @@ namespace ngcore } } WriteTimingChart(); +#ifdef NETGEN_TRACE_MEMORY WriteMemoryChart(""); +#endif // NETGEN_TRACE_MEMORY paje.WriteEvents(); } @@ -930,6 +932,7 @@ namespace ngcore } +#ifdef NETGEN_TRACE_MEMORY void PajeTrace::WriteMemoryChart( std::string fname ) { if(fname=="") @@ -1053,6 +1056,7 @@ namespace ngcore WriteSunburstHTML( root, fname, false ); } +#endif // NETGEN_TRACE_MEMORY void PajeTrace::WriteTimingChart( ) { diff --git a/libsrc/core/paje_trace.hpp b/libsrc/core/paje_trace.hpp index 421f866e..c153c0cd 100644 --- a/libsrc/core/paje_trace.hpp +++ b/libsrc/core/paje_trace.hpp @@ -31,7 +31,9 @@ namespace ngcore public: NGCORE_API void WriteTimingChart(); +#ifdef NETGEN_TRACE_MEMORY NGCORE_API void WriteMemoryChart( std::string fname ); +#endif // NETGEN_TRACE_MEMORY // Approximate number of events to trace. Tracing will // be stopped if any thread reaches this number of events diff --git a/libsrc/core/profiler.cpp b/libsrc/core/profiler.cpp index 73aad63d..66365321 100644 --- a/libsrc/core/profiler.cpp +++ b/libsrc/core/profiler.cpp @@ -113,7 +113,9 @@ namespace ngcore NgProfiler prof; // NOLINT +#ifdef NETGEN_TRACE_MEMORY std::vector MemoryTracer::names{"root"}; std::map< int, std::vector > MemoryTracer::tree; +#endif // NETGEN_TRACE_MEMORY } // namespace ngcore diff --git a/libsrc/core/profiler.hpp b/libsrc/core/profiler.hpp index cb07d053..cdd945bd 100644 --- a/libsrc/core/profiler.hpp +++ b/libsrc/core/profiler.hpp @@ -304,45 +304,29 @@ namespace ngcore namespace detail { - //Type trait to check if a class implements a 'const MemoryTracer& GetMemoryTracer()' function - template - struct has_GetMemoryTracer - { - private: - template - static constexpr auto check(T2*) -> - typename std::is_same().GetMemoryTracer()),const MemoryTracer &>::type; - template - static constexpr std::false_type check(...); - using type = decltype(check(nullptr)); // NOLINT - public: - static constexpr bool value = type::value; - }; - //Type trait to check if a class implements a 'void SetMemoryTacing(int)' function template - struct has_SetMemoryTracing + struct has_StartMemoryTracing { private: template static constexpr auto check(T2*) -> - typename std::is_same().SetMemoryTracing(0)),void>::type; + typename std::is_same().StartMemoryTracing()),void>::type; template static constexpr std::false_type check(...); using type = decltype(check(nullptr)); // NOLINT public: static constexpr bool value = type::value; }; - - } // namespace detail class MemoryTracer { + #ifdef NETGEN_TRACE_MEMORY NGCORE_API static std::vector names; NGCORE_API static std::map< int, std::vector > tree; - static int GetId(std::string name) + static int CreateId(const std::string& name) { int id = names.size(); names.push_back(name); @@ -350,48 +334,73 @@ namespace ngcore std::cerr << "Allocated " << id << " MemoryTracer objects" << std::endl; return id; } - - int id; public: MemoryTracer( std::string name ) { - id = GetId(name); + id = CreateId(name); } + // not tracing + MemoryTracer() : id(0) {} + template MemoryTracer( std::string name, TRest & ... rest ) { - id = GetId(name); + id = CreateId(name); Track(rest...); } + NETGEN_INLINE void Alloc(size_t size) const + { + if(id && trace) + trace->AllocMemory(id, size); + } + + void Free(size_t size) const + { + if(id && trace) + trace->FreeMemory(id, size); + } + + void Swap(size_t mysize, MemoryTracer& other, size_t other_size) const + { + if(!trace || (id == 0 && other.id == 0)) + return; + if(id == 0) + return trace->ChangeMemory(other.id, mysize - other_size); + if(other.id == 0) + return trace->ChangeMemory(id, other_size - mysize); + + // first decrease memory, otherwise have artificial/wrong high peak memory usage + if(mysizeChangeMemory(other.id, mysize-other_size); + trace->ChangeMemory(id, other_size-mysize); + } + else + { + trace->ChangeMemory(id, other_size-mysize); + trace->ChangeMemory(other.id, mysize-other_size); + } + } + + int GetId() const { return id; } + template - void Track( T1 & obj, std::string name, TRest & ... rest ) const + void Track( T1 & obj, const std::string& name, TRest & ... rest ) const { Track(obj, name); Track(rest...); } template - void Track( T & obj, std::string name ) const + void Track( T & obj, const std::string& name ) const { - if constexpr(detail::has_SetMemoryTracing::value) - { - int child_id = GetId(name); - tree[id].push_back(child_id); - obj.SetMemoryTracing(child_id); - } - if constexpr(detail::has_GetMemoryTracer::value) - { - auto & mt = obj.GetMemoryTracer(); - int child_id = mt.id; - if(name!="") - names[mt.id] = name; - tree[id].push_back(child_id); - } + obj.GetMemoryTracer().Activate(obj, name); + tree[id].push_back(obj.GetMemoryTracer().GetId()); } static std::string GetName(int id) @@ -404,7 +413,20 @@ namespace ngcore return names[id]; } - void SetName(std::string name) const + template + void Activate(T& me, const std::string& name) const + { + if(!id) + { + const_cast(this)->id = CreateId(name); + if constexpr(detail::has_StartMemoryTracing::value) + me.StartMemoryTracing(); + } + else + SetName(name); + } + + void SetName(const std::string& name) const { names[id] = name; } @@ -412,49 +434,26 @@ namespace ngcore static const std::vector & GetNames() { return names; } static const std::map> & GetTree() { return tree; } +#else // NETGEN_TRACE_MEMORY + public: + MemoryTracer() {} + MemoryTracer( std::string name ) {} + template + MemoryTracer( std::string name, TRest & ... ) {} + void Alloc(size_t size) const {} + void Free(size_t size) const {} + void Swap(...) const {} + int GetId() const { return 0; } + + template + void Track(TRest&...) const {} + + static std::string GetName(int id) { return ""; } + std::string GetName() const { return ""; } + void SetName(std::string name) const {} +#endif // NETGEN_TRACE_MEMORY }; - - NETGEN_INLINE void TraceMemoryAlloc( int mem_id, size_t size ) - { - if(mem_id && trace) - trace->AllocMemory(mem_id, size); - } - - NETGEN_INLINE void TraceMemoryFree( int mem_id, size_t size ) - { - if(mem_id && trace) - trace->FreeMemory(mem_id, size); - } - - NETGEN_INLINE void TraceMemoryChange( int mem_id, long long size ) - { - if(mem_id && trace) - trace->ChangeMemory(mem_id, size); - } - - NETGEN_INLINE void TraceMemorySwap( int mem_id, size_t size, int mem_id2, size_t size2 ) - { - if(!trace || (mem_id==0 && mem_id2==0)) - return; - if(mem_id == 0) - return trace->ChangeMemory(mem_id2, size-size2); - if(mem_id2 == 0) - return trace->ChangeMemory(mem_id, size2-size); - - // first decrease memory, otherwise have artificial/wrong high peak memory usage - if(sizeChangeMemory(mem_id2, size-size2); - trace->ChangeMemory(mem_id, size2-size); - } - else - { - trace->ChangeMemory(mem_id, size2-size); - trace->ChangeMemory(mem_id2, size-size2); - } - } - } // namespace ngcore // Helper macro to easily add multiple timers in a function for profiling diff --git a/libsrc/core/python_ngcore_export.cpp b/libsrc/core/python_ngcore_export.cpp index 59d1a13f..abdceb0e 100644 --- a/libsrc/core/python_ngcore_export.cpp +++ b/libsrc/core/python_ngcore_export.cpp @@ -266,7 +266,9 @@ threads : int .def_static("SetTraceThreads", &PajeTrace::SetTraceThreads) .def_static("SetTraceThreadCounter", &PajeTrace::SetTraceThreadCounter) .def_static("SetMaxTracefileSize", &PajeTrace::SetMaxTracefileSize) +#ifdef NETGEN_TRACE_MEMORY .def_static("WriteMemoryChart", [](string filename){ if(trace) trace->WriteMemoryChart(filename); }, py::arg("filename")="memory" ) +#endif // NETGEN_TRACE_MEMORY ; diff --git a/libsrc/core/table.hpp b/libsrc/core/table.hpp index 8db44f9d..7471b6a3 100644 --- a/libsrc/core/table.hpp +++ b/libsrc/core/table.hpp @@ -159,6 +159,7 @@ namespace ngcore NETGEN_INLINE Table (Table && tab2) : FlatTable(0, nullptr, nullptr) { + tab2.mt.Free(tab2.GetMemUsage()); Swap (size, tab2.size); Swap (index, tab2.index); Swap (data, tab2.data); @@ -166,6 +167,7 @@ namespace ngcore NETGEN_INLINE Table & operator= (Table && tab2) { + mt.Swap(GetMemUsage(), tab2.mt, tab2.GetMemUsage()); Swap (size, tab2.size); Swap (index, tab2.index); Swap (data, tab2.data); @@ -177,6 +179,7 @@ namespace ngcore /// Delete data NETGEN_INLINE ~Table () { + mt.Free(GetMemUsage()); delete [] data; delete [] index; } @@ -188,6 +191,16 @@ namespace ngcore NETGEN_INLINE size_t NElements() const { return index[size]; } using FlatTable::operator[]; + + NETGEN_INLINE void StartMemoryTracing (int mem_id) + { + mt.Alloc(GetMemUsage()); + } + const MemoryTracer& GetMemoryTracer() const { return mt; } + + private: + size_t GetMemUsage() const { return size == 0 ? 0 : sizeof(T)*index[size] + sizeof(IndexType) * size+1; } + MemoryTracer mt; }; diff --git a/libsrc/meshing/boundarylayer.cpp b/libsrc/meshing/boundarylayer.cpp index bd5427ef..89baa018 100644 --- a/libsrc/meshing/boundarylayer.cpp +++ b/libsrc/meshing/boundarylayer.cpp @@ -90,6 +90,9 @@ namespace netgen void GenerateBoundaryLayer(Mesh& mesh, const BoundaryLayerParameters& blp) { + static Timer timer("Create Boundarylayers"); + RegionTimer regt(timer); + int max_edge_nr = -1; for(const auto& seg : mesh.LineSegments()) if(seg.edgenr > max_edge_nr) From 922ad16213970138b05ef21ab36eaee3786f92f9 Mon Sep 17 00:00:00 2001 From: Christopher Lackner Date: Sat, 21 Nov 2020 22:32:41 +0100 Subject: [PATCH 09/12] if more memory is deallocated than allocated set memtracer to 0 not negative values --- libsrc/core/paje_trace.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index 4e387ec2..9b4ac053 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -967,12 +967,19 @@ namespace ngcore else { if(ev.size > mem_allocated) - std::cerr << "Error in memory tracer: have total allocated memory < 0" << std::endl; + { + std::cerr << "Error in memory tracer: have total allocated memory < 0" << std::endl; + mem_allocated = 0; + } + else + mem_allocated -= ev.size; if(ev.size > mem_allocated_id[ev.id]) - std::cerr << "Error in memory tracer: have allocated memory < 0 in tracer " << names[ev.id] << std::endl; - - mem_allocated -= ev.size; - mem_allocated_id[ev.id] -= ev.size; + { + std::cerr << "Error in memory tracer: have allocated memory < 0 in tracer " << names[ev.id] << std::endl; + mem_allocated_id[ev.id] = 0; + } + else + mem_allocated_id[ev.id] -= ev.size; } } @@ -985,7 +992,12 @@ namespace ngcore if(ev.is_alloc) mem_allocated_id[ev.id] += ev.size; else - mem_allocated_id[ev.id] -= ev.size; + { + if(ev.size > mem_allocated_id[ev.id]) + mem_allocated_id[ev.id] = 0; + else + mem_allocated_id[ev.id] -= ev.size; + } } TreeNode root; From efdc57885af52d12f6f33844e9c06f6ead0a282d Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Tue, 24 Nov 2020 15:47:25 +0100 Subject: [PATCH 10/12] memory tracing - store parents array instead of children table --- libsrc/core/paje_trace.cpp | 32 ++++++++++---------------------- libsrc/core/profiler.cpp | 4 ++-- libsrc/core/profiler.hpp | 7 ++++--- 3 files changed, 16 insertions(+), 27 deletions(-) diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index 9b4ac053..1a1c9303 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -942,7 +942,7 @@ namespace ngcore size_t imax_mem_allocated = 0; const auto & names = MemoryTracer::GetNames(); - const auto & tree = MemoryTracer::GetTree(); + const auto & parents = MemoryTracer::GetParents(); size_t N = names.size(); Array mem_allocated_id; @@ -1006,39 +1006,27 @@ namespace ngcore Array nodes; nodes.SetSize(N); nodes = nullptr; + Array> children(N); + Array sorting; // topological sorting (parents before children) sorting.SetAllocSize(N); - ArrayMem stack; - - // find root nodes in memory tracer tree, i.e. they have no parents - Array parents; - parents.SetSize(N); - parents = -1; - for( const auto & [iparent, children] : tree ) - for (auto child_id : children) - { - if(parents[child_id] != -1) - std::cerr << "Error in memory tracer: multiple parents found for " << names[child_id] << std::endl; - parents[child_id] = iparent; - } for(auto i : IntRange(1, N)) - if(parents[i]==-1) - { - sorting.Append(i); - if(tree.count(i)) - stack.Append(i); - } + children[parents[i]].Append(i); + + ArrayMem stack; + sorting.Append(0); + stack.Append(0); while(stack.Size()) { auto current = stack.Last(); stack.DeleteLast(); - for(const auto child : tree.at(current)) + for(const auto child : children[current]) { sorting.Append(child); - if(tree.count(child)) + if(children[child].Size()) stack.Append(child); } } diff --git a/libsrc/core/profiler.cpp b/libsrc/core/profiler.cpp index 66365321..33ef98f4 100644 --- a/libsrc/core/profiler.cpp +++ b/libsrc/core/profiler.cpp @@ -114,8 +114,8 @@ namespace ngcore NgProfiler prof; // NOLINT #ifdef NETGEN_TRACE_MEMORY - std::vector MemoryTracer::names{"root"}; - std::map< int, std::vector > MemoryTracer::tree; + std::vector MemoryTracer::names{"all"}; + std::vector MemoryTracer::parents{-1}; #endif // NETGEN_TRACE_MEMORY } // namespace ngcore diff --git a/libsrc/core/profiler.hpp b/libsrc/core/profiler.hpp index cdd945bd..208b7a4e 100644 --- a/libsrc/core/profiler.hpp +++ b/libsrc/core/profiler.hpp @@ -324,12 +324,13 @@ namespace ngcore { #ifdef NETGEN_TRACE_MEMORY NGCORE_API static std::vector names; - NGCORE_API static std::map< int, std::vector > tree; + NGCORE_API static std::vector parents; static int CreateId(const std::string& name) { int id = names.size(); names.push_back(name); + parents.push_back(0); if(id==10*NgProfiler::SIZE) std::cerr << "Allocated " << id << " MemoryTracer objects" << std::endl; return id; @@ -400,7 +401,7 @@ namespace ngcore void Track( T & obj, const std::string& name ) const { obj.GetMemoryTracer().Activate(obj, name); - tree[id].push_back(obj.GetMemoryTracer().GetId()); + parents[obj.GetMemoryTracer().GetId()] = id; } static std::string GetName(int id) @@ -433,7 +434,7 @@ namespace ngcore static const std::vector & GetNames() { return names; } - static const std::map> & GetTree() { return tree; } + static const std::vector & GetParents() { return parents; } #else // NETGEN_TRACE_MEMORY public: MemoryTracer() {} From b55264e0ee6cf0f29bbddebd27a9ea83ff9bfeac Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Tue, 24 Nov 2020 19:20:21 +0100 Subject: [PATCH 11/12] memory tracing - handle multiple consecutive tracers correctly --- libsrc/core/paje_trace.cpp | 33 +++++++++++++++++++++++++++------ libsrc/core/paje_trace.hpp | 3 ++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index 1a1c9303..bfe3fad0 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -29,6 +29,8 @@ namespace ngcore #endif // PARALLEL } + std::vector PajeTrace::memory_events; + // Produce no traces by default size_t PajeTrace::max_tracefile_size = 0; @@ -75,6 +77,7 @@ namespace ngcore start_time = GetTimeCounter(); tracing_enabled = true; mem_tracing_enabled = true; + n_memory_events_at_start = memory_events.size(); } PajeTrace :: ~PajeTrace() @@ -97,8 +100,8 @@ namespace ngcore for(auto & link : llink) link.time -= start_time; - for(auto & m : memory_events) - m.time -= start_time; + for(auto i : IntRange(n_memory_events_at_start, memory_events.size())) + memory_events[i].time -= start_time; NgMPI_Comm comm(MPI_COMM_WORLD); @@ -451,7 +454,6 @@ namespace ngcore if(mem_tracing_enabled) { variable_type_memory = paje.DefineVariableType( container_type_task_manager, "Memory [MB]" ); - paje.SetVariable( 0, variable_type_memory, container_memory, 0.0 ); } @@ -524,8 +526,23 @@ namespace ngcore paje.PopState( j.stop_time, state_type_job, container_jobs ); } - for(const auto & m : memory_events) + size_t memory_at_start = 0; + + for(const auto & i : IntRange(0, n_memory_events_at_start)) { + if(memory_events[i].is_alloc) + memory_at_start += memory_events[i].size; + else + memory_at_start -= memory_events[i].size; + } + + paje.SetVariable( 0, variable_type_memory, container_memory, 1.0*memory_at_start/(1024*1024)); + + for(const auto & i : IntRange(n_memory_events_at_start, memory_events.size())) + { + auto & m = memory_events[i]; + if(m.size==0) + continue; double size = 1.0*m.size/(1024*1024); if(m.is_alloc) paje.AddVariable( m.time, variable_type_memory, container_memory, size); @@ -958,7 +975,7 @@ namespace ngcore { mem_allocated += ev.size; mem_allocated_id[ev.id] += ev.size; - if(mem_allocated > max_mem_allocated) + if(mem_allocated > max_mem_allocated && i>=n_memory_events_at_start) { imax_mem_allocated = i; max_mem_allocated = mem_allocated; @@ -1006,6 +1023,7 @@ namespace ngcore Array nodes; nodes.SetSize(N); nodes = nullptr; + nodes[0] = &root; Array> children(N); Array sorting; // topological sorting (parents before children) @@ -1033,7 +1051,10 @@ namespace ngcore for(auto i : sorting) { - TreeNode * parent = (parents[i]==-1) ? &root : nodes[parents[i]]; + if(i==0) + continue; + + TreeNode * parent = nodes[parents[i]]; auto & node = parent->children[i]; nodes[i] = &node; diff --git a/libsrc/core/paje_trace.hpp b/libsrc/core/paje_trace.hpp index c153c0cd..5444a96c 100644 --- a/libsrc/core/paje_trace.hpp +++ b/libsrc/core/paje_trace.hpp @@ -28,6 +28,7 @@ namespace ngcore bool tracing_enabled; TTimePoint start_time; int nthreads; + size_t n_memory_events_at_start; public: NGCORE_API void WriteTimingChart(); @@ -119,7 +120,7 @@ namespace ngcore std::vector jobs; std::vector timer_events; std::vector > links; - std::vector memory_events; + NGCORE_API static std::vector memory_events; public: NGCORE_API void StopTracing(); From 91f127ef712af95377bf1a34ac3cacc639229e08 Mon Sep 17 00:00:00 2001 From: Matthias Hochsteger Date: Wed, 25 Nov 2020 14:34:29 +0100 Subject: [PATCH 12/12] memory tracer - fix memory accumulation of children --- libsrc/core/paje_trace.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index bfe3fad0..56fb0c12 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -1068,10 +1068,9 @@ namespace ngcore { // reverse topological order to accumulate total memory usage of all children auto i = sorting[sorting.Size()-1-i_]; - if(parents[i]==-1) - root.size += nodes[i]->size; - else - nodes[parents[i]]->size += nodes[i]->size; + if(i==0) + continue; + nodes[parents[i]]->size += nodes[i]->size; } WriteSunburstHTML( root, fname, false );