diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e07e4ec..2cb66344 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ option( BUILD_STUB_FILES "Build stub files for better autocompletion" ON) option( BUILD_FOR_CONDA "Link python libraries only to executables" OFF) option( USE_SUPERBUILD "use ccache" ON) +option( TRACE_MEMORY "Enable memory tracing" OFF) set(NG_COMPILE_FLAGS "" CACHE STRING "Additional compile flags") diff --git a/cmake/SuperBuild.cmake b/cmake/SuperBuild.cmake index 37709768..6e9600df 100644 --- a/cmake/SuperBuild.cmake +++ b/cmake/SuperBuild.cmake @@ -143,6 +143,7 @@ set_vars( NETGEN_CMAKE_ARGS USE_SPDLOG DEBUG_LOG CHECK_RANGE + TRACE_MEMORY BUILD_STUB_FILES BUILD_FOR_CONDA NG_COMPILE_FLAGS diff --git a/libsrc/core/CMakeLists.txt b/libsrc/core/CMakeLists.txt index 8329a195..c3eba6a5 100644 --- a/libsrc/core/CMakeLists.txt +++ b/libsrc/core/CMakeLists.txt @@ -42,6 +42,11 @@ if(CHECK_RANGE OR CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL target_compile_definitions(ngcore PUBLIC NETGEN_ENABLE_CHECK_RANGE) endif(CHECK_RANGE OR CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "DEBUG") +if(TRACE_MEMORY OR CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "DEBUG") + target_compile_definitions(ngcore PUBLIC NETGEN_TRACE_MEMORY) +endif(TRACE_MEMORY OR CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "DEBUG") + + if(USE_SPDLOG) include_directories(${SPDLOG_INCLUDE_DIR}) install(DIRECTORY ${SPDLOG_INCLUDE_DIR} diff --git a/libsrc/core/array.hpp b/libsrc/core/array.hpp index 864fd454..752f6f64 100644 --- a/libsrc/core/array.hpp +++ b/libsrc/core/array.hpp @@ -11,6 +11,7 @@ #include "archive.hpp" #include "exception.hpp" #include "localheap.hpp" +#include "profiler.hpp" #include "utils.hpp" namespace ngcore @@ -654,6 +655,7 @@ namespace ngcore /// that's the data we have to delete, nullptr for not owning the memory T * mem_to_delete; + using FlatArray::size; using FlatArray::data; using FlatArray::BASE; @@ -698,6 +700,8 @@ namespace ngcore NETGEN_INLINE Array (Array && a2) { + mt.Swap(sizeof(T) * allocsize, a2.mt, sizeof(T) * a2.allocsize); + size = a2.size; data = a2.data; allocsize = a2.allocsize; @@ -769,6 +773,7 @@ namespace ngcore NETGEN_INLINE ~Array() { delete [] mem_to_delete; + mt.Free(sizeof(T)*allocsize); } // Only provide this function if T is archivable @@ -823,6 +828,7 @@ namespace ngcore NETGEN_INLINE const Array & Assign (size_t asize, LocalHeap & lh) { delete [] mem_to_delete; + mt.Free(sizeof(T)*allocsize); size = allocsize = asize; data = lh.Alloc (asize); mem_to_delete = nullptr; @@ -930,6 +936,7 @@ namespace ngcore NETGEN_INLINE void DeleteAll () { delete [] mem_to_delete; + mt.Free(sizeof(T)*allocsize); mem_to_delete = NULL; data = 0; size = allocsize = 0; @@ -961,6 +968,8 @@ namespace ngcore /// steal array NETGEN_INLINE Array & operator= (Array && a2) { + mt.Swap(sizeof(T)*allocsize, a2.mt, sizeof(T)*a2.allocsize); + ngcore::Swap (size, a2.size); ngcore::Swap (data, a2.data); ngcore::Swap (allocsize, a2.allocsize); @@ -1034,16 +1043,26 @@ namespace ngcore NETGEN_INLINE void Swap (Array & b) { + mt.Swap(sizeof(T) * allocsize, b.mt, sizeof(T) * b.allocsize); + ngcore::Swap (size, b.size); ngcore::Swap (data, b.data); ngcore::Swap (allocsize, b.allocsize); ngcore::Swap (mem_to_delete, b.mem_to_delete); } + NETGEN_INLINE void StartMemoryTracing () const + { + mt.Alloc(sizeof(T) * allocsize); + } + + const MemoryTracer& GetMemoryTracer() const { return mt; } + private: /// resize array, at least to size minsize. copy contents NETGEN_INLINE void ReSize (size_t minsize); + MemoryTracer mt; }; @@ -1056,6 +1075,7 @@ namespace ngcore T * hdata = data; data = new T[nsize]; + mt.Alloc(sizeof(T) * nsize); if (hdata) { @@ -1069,6 +1089,7 @@ namespace ngcore for (size_t i = 0; i < mins; i++) data[i] = std::move(hdata[i]); #endif delete [] mem_to_delete; + mt.Free(sizeof(T) * allocsize); } mem_to_delete = data; diff --git a/libsrc/core/bitarray.cpp b/libsrc/core/bitarray.cpp index bc923ef1..6b1deac5 100644 --- a/libsrc/core/bitarray.cpp +++ b/libsrc/core/bitarray.cpp @@ -36,10 +36,15 @@ namespace ngcore void BitArray :: SetSize (size_t asize) { if (size == asize) return; - if (owns_data) delete [] data; + if (owns_data) + { + delete [] data; + mt.Free(Addr(size)+1); + } size = asize; data = new unsigned char [Addr (size)+1]; + mt.Alloc(Addr(size)+1); } BitArray & BitArray :: Set () throw() diff --git a/libsrc/core/bitarray.hpp b/libsrc/core/bitarray.hpp index 768b40df..cd8979de 100644 --- a/libsrc/core/bitarray.hpp +++ b/libsrc/core/bitarray.hpp @@ -150,6 +150,14 @@ public: NGCORE_API void DoArchive(Archive& archive); NGCORE_API auto * Data() const { return data; } + + const MemoryTracer& GetMemoryTracer() const { return mt; } + void StartMemoryTracing() const + { + if(owns_data) + mt.Alloc(Addr(size)+1); + } + private: /// unsigned char Mask (size_t i) const @@ -159,6 +167,7 @@ private: size_t Addr (size_t i) const { return (i / CHAR_BIT); } + MemoryTracer mt; }; diff --git a/libsrc/core/paje_trace.cpp b/libsrc/core/paje_trace.cpp index c58912a4..56fb0c12 100644 --- a/libsrc/core/paje_trace.cpp +++ b/libsrc/core/paje_trace.cpp @@ -29,6 +29,8 @@ namespace ngcore #endif // PARALLEL } + std::vector PajeTrace::memory_events; + // Produce no traces by default size_t PajeTrace::max_tracefile_size = 0; @@ -36,6 +38,7 @@ namespace ngcore // increases trace by a factor of two bool PajeTrace::trace_thread_counter = false; bool PajeTrace::trace_threads = true; + bool PajeTrace::mem_tracing_enabled = true; PajeTrace :: PajeTrace(int anthreads, std::string aname) { @@ -62,6 +65,7 @@ namespace ngcore jobs.reserve(reserve_size); timer_events.reserve(reserve_size); + memory_events.reserve(1024*1024); // sync start time when running in parallel #ifdef PARALLEL @@ -72,6 +76,8 @@ namespace ngcore start_time = GetTimeCounter(); tracing_enabled = true; + mem_tracing_enabled = true; + n_memory_events_at_start = memory_events.size(); } PajeTrace :: ~PajeTrace() @@ -94,6 +100,9 @@ namespace ngcore for(auto & link : llink) link.time -= start_time; + for(auto i : IntRange(n_memory_events_at_start, memory_events.size())) + memory_events[i].time -= start_time; + NgMPI_Comm comm(MPI_COMM_WORLD); if(comm.Size()==1) @@ -237,7 +246,8 @@ namespace ngcore PajeFile( const std::string & filename) { - ctrace_stream = fopen (filename.c_str(),"w"); // NOLINT + std::string fname = filename + ".trace"; + ctrace_stream = fopen (fname.c_str(),"w"); // NOLINT fprintf(ctrace_stream, "%s", header ); // NOLINT alias_counter = 0; } @@ -426,6 +436,7 @@ namespace ngcore const int container_type_thread = paje.DefineContainerType( container_type_task_manager, "Thread"); const int container_type_timer = container_type_thread; //paje.DefineContainerType( container_type_task_manager, "Timers"); const int container_type_jobs = paje.DefineContainerType( container_type_task_manager, "Jobs"); + const int container_type_memory = paje.DefineContainerType( container_type_task_manager, "Memory usage"); const int state_type_job = paje.DefineStateType( container_type_jobs, "Job" ); const int state_type_task = paje.DefineStateType( container_type_thread, "Task" ); @@ -433,12 +444,18 @@ namespace ngcore int variable_type_active_threads = 0; if(trace_thread_counter) - paje.DefineVariableType( container_type_jobs, "Active threads" ); + variable_type_active_threads = paje.DefineVariableType( container_type_jobs, "Active threads" ); const int container_task_manager = paje.CreateContainer( container_type_task_manager, 0, "The task manager" ); const int container_jobs = paje.CreateContainer( container_type_jobs, container_task_manager, "Jobs" ); - if(trace_thread_counter) - paje.SetVariable( 0, variable_type_active_threads, container_jobs, 0.0 ); + + int variable_type_memory = 0; + const int container_memory = paje.CreateContainer( container_type_memory, container_task_manager, "Memory" ); + if(mem_tracing_enabled) + { + variable_type_memory = paje.DefineVariableType( container_type_task_manager, "Memory [MB]" ); + } + int num_nodes = 1; //task_manager ? task_manager->GetNumNodes() : 1; std::vector thread_aliases; @@ -509,6 +526,30 @@ namespace ngcore paje.PopState( j.stop_time, state_type_job, container_jobs ); } + size_t memory_at_start = 0; + + for(const auto & i : IntRange(0, n_memory_events_at_start)) + { + if(memory_events[i].is_alloc) + memory_at_start += memory_events[i].size; + else + memory_at_start -= memory_events[i].size; + } + + paje.SetVariable( 0, variable_type_memory, container_memory, 1.0*memory_at_start/(1024*1024)); + + for(const auto & i : IntRange(n_memory_events_at_start, memory_events.size())) + { + auto & m = memory_events[i]; + if(m.size==0) + continue; + double size = 1.0*m.size/(1024*1024); + if(m.is_alloc) + paje.AddVariable( m.time, variable_type_memory, container_memory, size); + else + paje.SubVariable( m.time, variable_type_memory, container_memory, size); + } + std::set timer_ids; std::map timer_aliases; std::map timer_names; @@ -718,7 +759,10 @@ namespace ngcore } } } - WriteSunburstHTML(); + WriteTimingChart(); +#ifdef NETGEN_TRACE_MEMORY + WriteMemoryChart(""); +#endif // NETGEN_TRACE_MEMORY paje.WriteEvents(); } @@ -786,24 +830,25 @@ namespace ngcore int id = 0; std::map children; double chart_size = 0.0; // time without children (the chart lib accumulates children sizes again) - double time = 0.0; - double min_time = 1e99; - double max_time = 0.0; - size_t calls = 0; + double size = 0.0; + double min_size = 1e99; + double max_size = 0.0; std::string name; + + size_t calls = 0; TTimePoint start_time = 0; }; - void PrintNode (const TreeNode &n, int &level, std::ofstream & f); - void PrintNode (const TreeNode &n, int &level, std::ofstream & f) + void PrintNode (const TreeNode &n, std::ofstream & f) { f << "{ name: \"" + n.name + "\""; f << ", calls: " << n.calls; f << ", size: " << n.chart_size; - f << ", time: " << n.time; - f << ", min: " << n.min_time; - f << ", max: " << n.max_time; - f << ", avg: " << n.time/n.calls; + f << ", value: " << n.size; + f << ", min: " << n.min_size; + f << ", max: " << n.max_size; + if(n.calls) + f << ", avg: " << n.size/n.calls; int size = n.children.size(); if(size>0) { @@ -811,7 +856,7 @@ namespace ngcore f << ", children: ["; for(auto & c : n.children) { - PrintNode(c.second, level, f); + PrintNode(c.second, f); if(++i + + + + +)CODE_"; + if(!time_or_memory) + f << "Maximum Memory Consumption\n"; + f << R"CODE_( + + +
+ + + +)CODE_" << std::endl; + + + } + +#ifdef NETGEN_TRACE_MEMORY + void PajeTrace::WriteMemoryChart( std::string fname ) + { + if(fname=="") + fname = tracefile_name + "_memory"; + size_t mem_allocated = 0; + size_t max_mem_allocated = 0; + size_t imax_mem_allocated = 0; + + const auto & names = MemoryTracer::GetNames(); + const auto & parents = MemoryTracer::GetParents(); + size_t N = names.size(); + + Array mem_allocated_id; + mem_allocated_id.SetSize(N); + mem_allocated_id = 0; + + // Find point with maximum memory allocation, check for missing allocs/frees + for(auto i : IntRange(memory_events.size())) + { + const auto & ev = memory_events[i]; + + if(ev.is_alloc) + { + mem_allocated += ev.size; + mem_allocated_id[ev.id] += ev.size; + if(mem_allocated > max_mem_allocated && i>=n_memory_events_at_start) + { + imax_mem_allocated = i; + max_mem_allocated = mem_allocated; + } + } + else + { + if(ev.size > mem_allocated) + { + std::cerr << "Error in memory tracer: have total allocated memory < 0" << std::endl; + mem_allocated = 0; + } + else + mem_allocated -= ev.size; + if(ev.size > mem_allocated_id[ev.id]) + { + std::cerr << "Error in memory tracer: have allocated memory < 0 in tracer " << names[ev.id] << std::endl; + mem_allocated_id[ev.id] = 0; + } + else + mem_allocated_id[ev.id] -= ev.size; + } + } + + // reconstruct again the memory consumption after event imax_mem_allocated + mem_allocated_id = 0; + for(auto i : IntRange(imax_mem_allocated+1)) + { + const auto & ev = memory_events[i]; + + if(ev.is_alloc) + mem_allocated_id[ev.id] += ev.size; + else + { + if(ev.size > mem_allocated_id[ev.id]) + mem_allocated_id[ev.id] = 0; + else + mem_allocated_id[ev.id] -= ev.size; + } + } + + TreeNode root; + root.name="all"; + + Array nodes; + nodes.SetSize(N); + nodes = nullptr; + nodes[0] = &root; + Array> children(N); + + Array sorting; // topological sorting (parents before children) + sorting.SetAllocSize(N); + + for(auto i : IntRange(1, N)) + children[parents[i]].Append(i); + + ArrayMem stack; + sorting.Append(0); + stack.Append(0); + + while(stack.Size()) + { + auto current = stack.Last(); + stack.DeleteLast(); + + for(const auto child : children[current]) + { + sorting.Append(child); + if(children[child].Size()) + stack.Append(child); + } + } + + for(auto i : sorting) + { + if(i==0) + continue; + + TreeNode * parent = nodes[parents[i]]; + + auto & node = parent->children[i]; + nodes[i] = &node; + node.id = i; + node.chart_size = mem_allocated_id[i]; + node.size = mem_allocated_id[i]; + node.name = names[i]; + } + + for(auto i_ : Range(sorting)) + { + // reverse topological order to accumulate total memory usage of all children + auto i = sorting[sorting.Size()-1-i_]; + if(i==0) + continue; + nodes[parents[i]]->size += nodes[i]->size; + } + + WriteSunburstHTML( root, fname, false ); + + } +#endif // NETGEN_TRACE_MEMORY + + void PajeTrace::WriteTimingChart( ) { std::vector events; @@ -861,10 +1119,10 @@ namespace ngcore std::sort (events.begin(), events.end()); - root.time = 1000.0*static_cast(stop_time) * seconds_per_tick; + root.size = 1000.0*static_cast(stop_time) * seconds_per_tick; root.calls = 1; - root.min_time = root.time; - root.max_time = root.time; + root.min_size = root.size; + root.max_size = root.size; for(auto & event : events) { @@ -881,7 +1139,7 @@ namespace ngcore if(need_init) { current->name = is_timer_event ? GetTimerName(id) : job_names[id]; - current->time = 0.0; + current->size = 0.0; current->id = id; } @@ -893,73 +1151,22 @@ namespace ngcore std::cout << "node stack empty!" << std::endl; break; } - double time = 1000.0*static_cast(event.time-current->start_time) * seconds_per_tick; - current->time += time; - current->chart_size += time; - current->min_time = std::min(current->min_time, time); - current->max_time = std::max(current->max_time, time); + double size = 1000.0*static_cast(event.time-current->start_time) * seconds_per_tick; + current->size += size; + current->chart_size += size; + current->min_size = std::min(current->min_size, size); + current->max_size = std::max(current->max_size, size); current->calls++; current = node_stack.back(); - current->chart_size -= time; + current->chart_size -= size; node_stack.pop_back(); } } root.chart_size = 0.0; - int level = 0; - std::ofstream f(tracefile_name+".html"); - f.precision(4); - f << R"CODE_( - - - - - - - -
- - - -)CODE_" << std::endl; + ngcore::WriteSunburstHTML( root, tracefile_name, true ); } } // namespace ngcore diff --git a/libsrc/core/paje_trace.hpp b/libsrc/core/paje_trace.hpp index 95c42d4a..5444a96c 100644 --- a/libsrc/core/paje_trace.hpp +++ b/libsrc/core/paje_trace.hpp @@ -23,18 +23,28 @@ namespace ngcore NGCORE_API static size_t max_tracefile_size; NGCORE_API static bool trace_thread_counter; NGCORE_API static bool trace_threads; + NGCORE_API static bool mem_tracing_enabled; bool tracing_enabled; TTimePoint start_time; int nthreads; + size_t n_memory_events_at_start; public: - void WriteSunburstHTML(); + NGCORE_API void WriteTimingChart(); +#ifdef NETGEN_TRACE_MEMORY + NGCORE_API void WriteMemoryChart( std::string fname ); +#endif // NETGEN_TRACE_MEMORY // Approximate number of events to trace. Tracing will // be stopped if any thread reaches this number of events unsigned int max_num_events_per_thread; + static void SetTraceMemory( bool trace_memory ) + { + mem_tracing_enabled = trace_memory; + } + static void SetTraceThreads( bool atrace_threads ) { trace_threads = atrace_threads; @@ -96,10 +106,21 @@ namespace ngcore bool operator < (const ThreadLink & other) const { return time < other.time; } }; + struct MemoryEvent + { + TTimePoint time; + size_t size; + int id; + bool is_alloc; + + bool operator < (const MemoryEvent & other) const { return time < other.time; } + }; + std::vector > tasks; std::vector jobs; std::vector timer_events; std::vector > links; + NGCORE_API static std::vector memory_events; public: NGCORE_API void StopTracing(); @@ -129,6 +150,27 @@ namespace ngcore timer_events.push_back(TimerEvent{timer_id, GetTimeCounter(), false}); } + void AllocMemory(int id, size_t size) + { + if(!mem_tracing_enabled) return; + memory_events.push_back(MemoryEvent{GetTimeCounter(), size, id, true}); + } + + void FreeMemory(int id, size_t size) + { + if(!mem_tracing_enabled) return; + memory_events.push_back(MemoryEvent{GetTimeCounter(), size, id, false}); + } + + void ChangeMemory(int id, long long size) + { + if(size>0) + AllocMemory(id, size); + if(size<0) + FreeMemory(id, -size); + } + + NETGEN_INLINE int StartTask(int thread_id, int id, int id_type = Task::ID_NONE, int additional_value = -1) { if(!tracing_enabled) return -1; diff --git a/libsrc/core/profiler.cpp b/libsrc/core/profiler.cpp index 1190e6fe..33ef98f4 100644 --- a/libsrc/core/profiler.cpp +++ b/libsrc/core/profiler.cpp @@ -113,5 +113,9 @@ namespace ngcore NgProfiler prof; // NOLINT +#ifdef NETGEN_TRACE_MEMORY + std::vector MemoryTracer::names{"all"}; + std::vector MemoryTracer::parents{-1}; +#endif // NETGEN_TRACE_MEMORY } // namespace ngcore diff --git a/libsrc/core/profiler.hpp b/libsrc/core/profiler.hpp index c16c242c..208b7a4e 100644 --- a/libsrc/core/profiler.hpp +++ b/libsrc/core/profiler.hpp @@ -3,6 +3,7 @@ #include #include +#include #include #include "logging.hpp" @@ -299,6 +300,161 @@ namespace ngcore return tres; } + class MemoryTracer; + + namespace detail + { + //Type trait to check if a class implements a 'void SetMemoryTacing(int)' function + template + struct has_StartMemoryTracing + { + private: + template + static constexpr auto check(T2*) -> + typename std::is_same().StartMemoryTracing()),void>::type; + template + static constexpr std::false_type check(...); + using type = decltype(check(nullptr)); // NOLINT + public: + static constexpr bool value = type::value; + }; + } // namespace detail + + class MemoryTracer + { + #ifdef NETGEN_TRACE_MEMORY + NGCORE_API static std::vector names; + NGCORE_API static std::vector parents; + + static int CreateId(const std::string& name) + { + int id = names.size(); + names.push_back(name); + parents.push_back(0); + if(id==10*NgProfiler::SIZE) + std::cerr << "Allocated " << id << " MemoryTracer objects" << std::endl; + return id; + } + int id; + + public: + + MemoryTracer( std::string name ) + { + id = CreateId(name); + } + + // not tracing + MemoryTracer() : id(0) {} + + template + MemoryTracer( std::string name, TRest & ... rest ) + { + id = CreateId(name); + Track(rest...); + } + + NETGEN_INLINE void Alloc(size_t size) const + { + if(id && trace) + trace->AllocMemory(id, size); + } + + void Free(size_t size) const + { + if(id && trace) + trace->FreeMemory(id, size); + } + + void Swap(size_t mysize, MemoryTracer& other, size_t other_size) const + { + if(!trace || (id == 0 && other.id == 0)) + return; + if(id == 0) + return trace->ChangeMemory(other.id, mysize - other_size); + if(other.id == 0) + return trace->ChangeMemory(id, other_size - mysize); + + // first decrease memory, otherwise have artificial/wrong high peak memory usage + if(mysizeChangeMemory(other.id, mysize-other_size); + trace->ChangeMemory(id, other_size-mysize); + } + else + { + trace->ChangeMemory(id, other_size-mysize); + trace->ChangeMemory(other.id, mysize-other_size); + } + } + + int GetId() const { return id; } + + template + void Track( T1 & obj, const std::string& name, TRest & ... rest ) const + { + Track(obj, name); + Track(rest...); + } + + template + void Track( T & obj, const std::string& name ) const + { + obj.GetMemoryTracer().Activate(obj, name); + parents[obj.GetMemoryTracer().GetId()] = id; + } + + static std::string GetName(int id) + { + return names[id]; + } + + std::string GetName() const + { + return names[id]; + } + + template + void Activate(T& me, const std::string& name) const + { + if(!id) + { + const_cast(this)->id = CreateId(name); + if constexpr(detail::has_StartMemoryTracing::value) + me.StartMemoryTracing(); + } + else + SetName(name); + } + + void SetName(const std::string& name) const + { + names[id] = name; + } + + + static const std::vector & GetNames() { return names; } + static const std::vector & GetParents() { return parents; } +#else // NETGEN_TRACE_MEMORY + public: + MemoryTracer() {} + MemoryTracer( std::string name ) {} + template + MemoryTracer( std::string name, TRest & ... ) {} + + void Alloc(size_t size) const {} + void Free(size_t size) const {} + void Swap(...) const {} + int GetId() const { return 0; } + + template + void Track(TRest&...) const {} + + static std::string GetName(int id) { return ""; } + std::string GetName() const { return ""; } + void SetName(std::string name) const {} +#endif // NETGEN_TRACE_MEMORY + }; } // namespace ngcore // Helper macro to easily add multiple timers in a function for profiling diff --git a/libsrc/core/python_ngcore_export.cpp b/libsrc/core/python_ngcore_export.cpp index 4f93168e..abdceb0e 100644 --- a/libsrc/core/python_ngcore_export.cpp +++ b/libsrc/core/python_ngcore_export.cpp @@ -247,23 +247,28 @@ threads : int ; py::class_(m, "PajeTrace") - .def(py::init( [] (string filename, size_t size_mb, bool threads, bool thread_counter) + .def(py::init( [] (string filename, size_t size_mb, bool threads, bool thread_counter, bool memory) { PajeTrace::SetMaxTracefileSize(size_mb*1014*1024); PajeTrace::SetTraceThreads(threads); + PajeTrace::SetTraceMemory(memory); PajeTrace::SetTraceThreadCounter(thread_counter); trace = new PajeTrace(TaskManager::GetMaxThreads(), filename); return trace; }), py::arg("filename")="ng.trace", py::arg("size")=1000, py::arg("threads")=true, py::arg("thread_counter")=false, + py::arg("memory")=true, "size in Megabytes" ) .def("__enter__", [](PajeTrace & self) { }) .def("__exit__", [](PajeTrace & self, py::args) { self.StopTracing(); }) .def("__del__", [](PajeTrace & self) { trace = nullptr; }) - .def("SetTraceThreads", &PajeTrace::SetTraceThreads) - .def("SetTraceThreadCounter", &PajeTrace::SetTraceThreadCounter) - .def("SetMaxTracefileSize", &PajeTrace::SetMaxTracefileSize) + .def_static("SetTraceThreads", &PajeTrace::SetTraceThreads) + .def_static("SetTraceThreadCounter", &PajeTrace::SetTraceThreadCounter) + .def_static("SetMaxTracefileSize", &PajeTrace::SetMaxTracefileSize) +#ifdef NETGEN_TRACE_MEMORY + .def_static("WriteMemoryChart", [](string filename){ if(trace) trace->WriteMemoryChart(filename); }, py::arg("filename")="memory" ) +#endif // NETGEN_TRACE_MEMORY ; diff --git a/libsrc/core/table.hpp b/libsrc/core/table.hpp index 8db44f9d..7471b6a3 100644 --- a/libsrc/core/table.hpp +++ b/libsrc/core/table.hpp @@ -159,6 +159,7 @@ namespace ngcore NETGEN_INLINE Table (Table && tab2) : FlatTable(0, nullptr, nullptr) { + tab2.mt.Free(tab2.GetMemUsage()); Swap (size, tab2.size); Swap (index, tab2.index); Swap (data, tab2.data); @@ -166,6 +167,7 @@ namespace ngcore NETGEN_INLINE Table & operator= (Table && tab2) { + mt.Swap(GetMemUsage(), tab2.mt, tab2.GetMemUsage()); Swap (size, tab2.size); Swap (index, tab2.index); Swap (data, tab2.data); @@ -177,6 +179,7 @@ namespace ngcore /// Delete data NETGEN_INLINE ~Table () { + mt.Free(GetMemUsage()); delete [] data; delete [] index; } @@ -188,6 +191,16 @@ namespace ngcore NETGEN_INLINE size_t NElements() const { return index[size]; } using FlatTable::operator[]; + + NETGEN_INLINE void StartMemoryTracing (int mem_id) + { + mt.Alloc(GetMemUsage()); + } + const MemoryTracer& GetMemoryTracer() const { return mt; } + + private: + size_t GetMemUsage() const { return size == 0 ? 0 : sizeof(T)*index[size] + sizeof(IndexType) * size+1; } + MemoryTracer mt; }; diff --git a/libsrc/core/taskmanager.cpp b/libsrc/core/taskmanager.cpp index be345321..a1049a1c 100644 --- a/libsrc/core/taskmanager.cpp +++ b/libsrc/core/taskmanager.cpp @@ -160,7 +160,7 @@ namespace ngcore static int cnt = 0; if (use_paje_trace) - trace = new PajeTrace(num_threads, "ng" + ToString(cnt++) + ".trace"); + trace = new PajeTrace(num_threads, "ng" + ToString(cnt++)); } diff --git a/libsrc/meshing/boundarylayer.cpp b/libsrc/meshing/boundarylayer.cpp index 07bc8dca..3fe25adc 100644 --- a/libsrc/meshing/boundarylayer.cpp +++ b/libsrc/meshing/boundarylayer.cpp @@ -90,6 +90,9 @@ namespace netgen void GenerateBoundaryLayer(Mesh& mesh, const BoundaryLayerParameters& blp) { + static Timer timer("Create Boundarylayers"); + RegionTimer regt(timer); + int max_edge_nr = -1; for(const auto& seg : mesh.LineSegments()) if(seg.edgenr > max_edge_nr) diff --git a/libsrc/meshing/delaunay.cpp b/libsrc/meshing/delaunay.cpp index adc64712..cd675ef3 100644 --- a/libsrc/meshing/delaunay.cpp +++ b/libsrc/meshing/delaunay.cpp @@ -772,6 +772,7 @@ namespace netgen // improve delaunay - mesh by swapping !!!! Mesh tempmesh; + tempmesh.GetMemoryTracer().SetName("delaunay-tempmesh"); for (auto & meshpoint : mesh.Points()) tempmesh.AddPoint (meshpoint); diff --git a/libsrc/meshing/meshclass.hpp b/libsrc/meshing/meshclass.hpp index 1de31c97..aab4e87a 100644 --- a/libsrc/meshing/meshclass.hpp +++ b/libsrc/meshing/meshclass.hpp @@ -925,6 +925,15 @@ namespace netgen shared_ptr Mirror( netgen::Point<3> p, Vec<3> n ); + private: + MemoryTracer mem_tracer = {"Mesh", + points, "points", + segments, "segments", + surfelements, "surfelements", + volelements, "volelements" + }; + public: + const MemoryTracer & GetMemoryTracer() { return mem_tracer; } }; inline ostream& operator<<(ostream& ost, const Mesh& mesh)