#include #include #include #include #include #include #include "archive.hpp" // for Demangle #include "paje_trace.hpp" static constexpr int MAX_TRACE_LINE_SIZE = 50; extern const char *header; namespace ngcore { // Produce no traces by default size_t PajeTrace::max_tracefile_size = 11110000; // If true, produce variable counting active threads // increases trace by a factor of two bool PajeTrace::trace_thread_counter = true; bool PajeTrace::trace_threads = true; PajeTrace :: PajeTrace(int anthreads, std::string aname) { start_time = GetTimeCounter(); nthreads = anthreads; tracefile_name = std::move(aname); int bytes_per_event=33; max_num_events_per_thread = std::min( static_cast(std::numeric_limits::max()), max_tracefile_size/bytes_per_event/(2*nthreads+1)*10/7); if(max_num_events_per_thread>0) { logger->info( "Tracefile size = {}MB", max_tracefile_size/1024/1024); logger->info( "Tracing {} events per thread", max_num_events_per_thread , " events per thread"); } tasks.resize(nthreads); int reserve_size = std::min(1000000U, max_num_events_per_thread); for(auto & t : tasks) t.reserve(reserve_size); links.resize(nthreads); for(auto & l : links) l.reserve(reserve_size); jobs.reserve(reserve_size); timer_events.reserve(reserve_size); tracing_enabled = true; } PajeTrace :: ~PajeTrace() { if(!tracefile_name.empty()) Write(tracefile_name); } void PajeTrace::StopTracing() { if(tracing_enabled && max_num_events_per_thread>0) { logger->warn("Maximum number of traces reached, tracing is stopped now."); } tracing_enabled = false; } class PajeFile { public: static void Hue2RGB ( double x, double &r, double &g, double &b ) { double d = 1.0/6.0; if(x logger = GetLogger("PajeTrace"); double ConvertTime(TTimePoint t) { // return time in milliseconds as double // return std::chrono::duration(t-start_time).count()*1000.0; // return std::chrono::duration(t-start_time).count() / 2.7e3; return (t-start_time) / 2.7e6; } enum PType { SET_VARIABLE=1, ADD_VARIABLE, SUB_VARIABLE, PUSH_STATE, POP_STATE, START_LINK, STOP_LINK }; struct PajeEvent { PajeEvent( int aevent_type, double atime, int atype, int acontainer, double avar_value ) : time(atime), var_value(avar_value), event_type(aevent_type), type(atype), container(acontainer) { } PajeEvent( int aevent_type, double atime, int atype, int acontainer, int avalue = 0, int aid = 0, bool avalue_is_alias = true ) : time(atime), event_type(aevent_type), type(atype), container(acontainer), value(avalue), id(aid), value_is_alias(avalue_is_alias) { } PajeEvent( int aevent_type, double atime, int atype, int acontainer, int avalue, int astart_container, int akey ) : time(atime), event_type(aevent_type), type(atype), container(acontainer), value(avalue), start_container(astart_container), id(akey) { } double time; double var_value = 0.0; int event_type; int type; int container; int value = 0; int start_container = 0; int id = 0; bool value_is_alias = true; bool operator < (const PajeEvent & other) const { // Same start and stop times can occur for very small tasks -> take "starting" events first (eg. PajePushState before PajePopState) if(time == other.time) return event_type < other.event_type; return (time < other.time); } int write(FILE *stream) { const int &key = id; const int &end_container = start_container; switch(event_type) { case PajeSetVariable: return fprintf( stream, "%d\t%.15g\ta%d\ta%d\t%.15g\n", PajeSetVariable, time, type, container, var_value ); // NOLINT case PajeAddVariable: return fprintf( stream, "%d\t%.15g\ta%d\ta%d\t%.15g\n", PajeAddVariable, time, type, container, var_value ); // NOLINT case PajeSubVariable: return fprintf( stream, "%d\t%.15g\ta%d\ta%d\t%.15g\n", PajeSubVariable, time, type, container, var_value ); // NOLINT case PajePushState: if(value_is_alias) return fprintf( stream, "%d\t%.15g\ta%d\ta%d\ta%d\t%d\n", PajePushState, time, type, container, value, id); // NOLINT else return fprintf( stream, "%d\t%.15g\ta%d\ta%d\t%d\t%d\n", PajePushState, time, type, container, value, id); // NOLINT case PajePopState: return fprintf( stream, "%d\t%.15g\ta%d\ta%d\n", PajePopState, time, type, container ); // NOLINT case PajeStartLink: return fprintf( stream, "%d\t%.15g\ta%d\ta%d\t%d\ta%d\t%d\n", PajeStartLink, time, type, container, value, start_container, key ); // NOLINT case PajeEndLink: return fprintf( stream, "%d\t%.15g\ta%d\ta%d\t%d\ta%d\t%d\n", PajeEndLink, time, type, container, value, end_container, key ); // NOLINT } return 0; } }; std::vector events; public: PajeFile() = delete; PajeFile(const PajeFile &) = delete; PajeFile(PajeFile &&) = delete; void operator=(const PajeFile &) = delete; void operator=(PajeFile &&) = delete; PajeFile( const std::string & filename, TTimePoint astart_time ) { start_time = astart_time; ctrace_stream = fopen (filename.c_str(),"w"); // NOLINT fprintf(ctrace_stream, "%s", header ); // NOLINT alias_counter = 0; } ~PajeFile() { fclose (ctrace_stream); // NOLINT } int DefineContainerType ( int parent_type, const std::string & name ) { int alias = ++alias_counter; if(parent_type!=0) fprintf( ctrace_stream, "%d\ta%d\ta%d\t\"%s\"\n", PajeDefineContainerType, alias, parent_type, name.c_str() ); // NOLINT else fprintf( ctrace_stream, "%d\ta%d\t%d\t\"%s\"\n", PajeDefineContainerType, alias, parent_type, name.c_str() ); // NOLINT return alias; } int DefineVariableType ( int container_type, const std::string & name ) { int alias = ++alias_counter; fprintf( ctrace_stream, "%d\ta%d\ta%d\t\"%s\"\t\"1.0 1.0 1.0\"\n", PajeDefineVariableType, alias, container_type, name.c_str() ); // NOLINT return alias; } int DefineStateType ( int type, const std::string & name ) { int alias = ++alias_counter; fprintf( ctrace_stream, "%d\ta%d\ta%d\t\"%s\"\n", PajeDefineStateType, alias, type, name.c_str() ); // NOLINT return alias; } // int DefineEventType () // { // Write("event not implemented"); // } int DefineLinkType (int parent_container_type, int start_container_type, int stop_container_type, const std::string & name) { int alias = ++alias_counter; fprintf( ctrace_stream, "%d\ta%d\ta%d\ta%d\ta%d\t\"%s\"\n", PajeDefineLinkType, alias, parent_container_type, start_container_type, stop_container_type, name.c_str() ); // NOLINT return alias; } int DefineEntityValue (int type, const std::string & name, double hue = -1) { if(hue==-1) { std::hash shash; size_t h = shash(name); h ^= h>>32U; h = static_cast(h); hue = h*1.0/std::numeric_limits::max(); } int alias = ++alias_counter; double r,g,b; Hue2RGB( hue, r, g, b ); fprintf( ctrace_stream, "%d\ta%d\ta%d\t\"%s\"\t\"%.15g %.15g %.15g\"\n", PajeDefineEntityValue, alias, type, name.c_str(), r,g,b ); // NOLINT return alias; } int CreateContainer ( int type, int parent, const std::string & name ) { int alias = ++alias_counter; if(parent!=0) fprintf( ctrace_stream, "%d\t0\ta%d\ta%d\ta%d\t\"%s\"\n", PajeCreateContainer, alias, type, parent, name.c_str() ); // NOLINT else fprintf( ctrace_stream, "%d\t0\ta%d\ta%d\t%d\t\"%s\"\n", PajeCreateContainer, alias, type, parent, name.c_str() ); // NOLINT return alias; } void DestroyContainer () {} void SetVariable (TTimePoint time, int type, int container, double value ) { events.emplace_back( PajeEvent( PajeSetVariable, ConvertTime(time), type, container, value ) ); } void AddVariable (TTimePoint time, int type, int container, double value ) { events.emplace_back( PajeEvent( PajeAddVariable, ConvertTime(time), type, container, value ) ); } void SubVariable (TTimePoint time, int type, int container, double value ) { events.emplace_back( PajeEvent( PajeSubVariable, ConvertTime(time), type, container, value ) ); } void SetState () {} void PushState ( TTimePoint time, int type, int container, int value, int id = 0, bool value_is_alias = true ) { events.emplace_back( PajeEvent( PajePushState, ConvertTime(time), type, container, value, id, value_is_alias) ); } void PopState ( TTimePoint time, int type, int container ) { events.emplace_back( PajeEvent( PajePopState, ConvertTime(time), type, container ) ); } void ResetState () {} void StartLink ( TTimePoint time, int type, int container, int value, int start_container, int key ) { events.emplace_back( PajeEvent( PajeStartLink, ConvertTime(time), type, container, value, start_container, key ) ); } void EndLink ( TTimePoint time, int type, int container, int value, int end_container, int key ) { events.emplace_back( PajeEvent( PajeEndLink, ConvertTime(time), type, container, value, end_container, key ) ); } void NewEvent () {} void WriteEvents() { logger->info("Sorting traces..."); std::sort (events.begin(), events.end()); logger->info("Writing traces... "); for (auto & event : events) { event.write( ctrace_stream ); // fprintf( ctrace_stream, "%s", buf ); // NOLINT } logger->info("Done"); } private: enum { PajeDefineContainerType = 0, PajeDefineVariableType = 1, PajeDefineStateType = 2, PajeDefineEventType = 3, PajeDefineLinkType = 4, PajeDefineEntityValue = 5, PajeCreateContainer = 6, PajeDestroyContainer = 7, PajeSetVariable = 8, PajeAddVariable = 9, PajeSubVariable = 10, PajeSetState = 11, PajePushState = 12, PajePopState = 13, PajeResetState = 14, PajeStartLink = 15, PajeEndLink = 16, PajeNewEvent = 17 }; }; NGCORE_API PajeTrace *trace; void PajeTrace::Write( const std::string & filename ) { int n_events = jobs.size() + timer_events.size(); for(auto & vtasks : tasks) n_events += vtasks.size(); logger->info("{} events traced", n_events); if(n_events==0) { logger->info("No data traced, skip writing trace file"); return; } if(!tracing_enabled) { logger->warn("Tracing stopped during computation due to tracefile size limit of {} megabytes.", max_tracefile_size/1024/1024); } PajeFile paje(filename, start_time); const int container_type_task_manager = paje.DefineContainerType( 0, "Task Manager" ); const int container_type_node = paje.DefineContainerType( container_type_task_manager, "Node"); const int container_type_thread = paje.DefineContainerType( container_type_task_manager, "Thread"); const int container_type_timer = container_type_thread; //paje.DefineContainerType( container_type_task_manager, "Timers"); const int container_type_jobs = paje.DefineContainerType( container_type_task_manager, "Jobs"); const int state_type_job = paje.DefineStateType( container_type_jobs, "Job" ); const int state_type_task = paje.DefineStateType( container_type_thread, "Task" ); const int state_type_timer = paje.DefineStateType( container_type_timer, "Timer state" ); const int variable_type_active_threads = paje.DefineVariableType( container_type_jobs, "Active threads" ); const int container_task_manager = paje.CreateContainer( container_type_task_manager, 0, "The task manager" ); const int container_jobs = paje.CreateContainer( container_type_jobs, container_task_manager, "Jobs" ); paje.SetVariable( start_time, variable_type_active_threads, container_jobs, 0.0 ); const int num_nodes = 1; //task_manager ? task_manager->GetNumNodes() : 1; std::vector container_nodes; container_nodes.reserve(num_nodes); for(int i=0; i thread_aliases; thread_aliases.reserve(nthreads); if(trace_threads) for (int i=0; i job_map; std::map job_task_map; for(Job & j : jobs) if(job_map.find(j.type) == job_map.end()) { std::string name = Demangle(j.type->name()); job_map[j.type] = paje.DefineEntityValue( state_type_job, name, -1 ); job_task_map[j.type] = paje.DefineEntityValue( state_type_task, name, -1 ); } for(Job & j : jobs) { paje.PushState( j.start_time, state_type_job, container_jobs, job_map[j.type] ); paje.PopState( j.stop_time, state_type_job, container_jobs ); } std::set timer_ids; std::map timer_aliases; for(auto & event : timer_events) timer_ids.insert(event.timer_id); for(auto & vtasks : tasks) for (Task & t : vtasks) if(t.id_type==Task::ID_TIMER) timer_ids.insert(t.id); for(auto id : timer_ids) timer_aliases[id] = paje.DefineEntityValue( state_type_timer, "a timer" /* TODO: NgProfiler::GetName(id).c_str()*/, -1 ); int timerdepth = 0; int maxdepth = 0; for(auto & event : timer_events) { if(event.is_start) { timerdepth++; maxdepth = timerdepth>maxdepth ? timerdepth : maxdepth; } else timerdepth--; } std::vector timer_container_aliases; timer_container_aliases.resize(maxdepth); for(int i=0; i links_merged; links_merged.reserve(nlinks); std::vector pos(nthreads); int nlinks_merged = 0; while(nlinks_merged < nlinks) { int minpos = -1; TTimePoint mintime = -1; for (int t = 0; t started_links; int link_type = paje.DefineLinkType(container_type_node, container_type_thread, container_type_thread, "links"); // match links for ( auto & l : links_merged ) { if(l.is_start) { started_links.push_back(l); } else { unsigned int i = 0; while(i