15 #ifdef _PROFILER_WITH_PAPI_ 20 #if __cplusplus >= 201103L // C++11 supported? 22 static __inline__
unsigned long long rdtsc(
void)
24 return(std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch()).count());
27 #elif defined(__i386) || defined(__x86_64__) 29 static __inline__
unsigned long long rdtsc(
void)
32 __asm__ __volatile__ (
38 return ((
unsigned long long)lo) | (((
unsigned long long)hi) << 32);
43 static __inline__
unsigned long long rdtsc(
void)
46 __asm__ __volatile__ (
"mov %0=ar.itc" :
"=r" (r) ::
"memory");
50 #elif defined(__i386__) 52 static __inline__
unsigned long long rdtsc(
void)
54 unsigned long long int x;
55 __asm__
volatile (
".byte 0x0f, 0x31" :
"=A" (
x));
59 #elif defined(__powerpc__) 60 static __inline__
unsigned long long rdtsc(
void)
62 unsigned long long int result=0;
63 unsigned long int upper, lower,tmp;
71 :
"=r"(upper),
"=r"(lower),
"=r"(tmp)
75 result = result|lower;
99 incl = excl = incl_dev = excl_dev = 0.0;
128 incl_mean = incl_stdev = excl_min = excl_max = 0.0;
129 excl_mean = excl_stdev = call_mean = call_stdev = 0.0;
130 incl_minrank = incl_maxrank = excl_minrank = 0;
131 excl_maxrank = call_max = call_min = call_maxrank = 0;
133 incl_min = incl_max = 0.0;
143 std::vector<double> sstats[18];
164 #ifdef _PROFILER_WITH_PAPI_ 175 : _id(0),_exclusive(0.),_inclusive(0.),_timestamp(0.)
182 : _id(i),_exclusive(0.),_inclusive(0.),_timestamp(0.)
189 : _id(i),_exclusive(0.),_inclusive(0.),_timestamp(0.)
191 #ifdef _PROFILER_WITH_PAPI_ 195 Event(
unsigned int i,
double e,
double it)
196 : _id(i),_exclusive(e),_inclusive(it),_timestamp(0.)
198 #ifdef _PROFILER_WITH_PAPI_ 203 : _id(e._id),_exclusive(e._exclusive),_inclusive(e._inclusive),
204 _timestamp(e._timestamp)
206 #ifdef _PROFILER_WITH_PAPI_ 208 _inc_hwc =
new long long [_nhwc];
209 _exc_hwc =
new long long [_nhwc];
213 #ifdef _PROFILER_WITH_PAPI_ 229 #ifdef _PROFILER_WITH_PAPI_ 232 _inc_hwc =
new long long [_nhwc];
233 _exc_hwc =
new long long [_nhwc];
235 for(
int i = 0;i < _nwhc;i++){
236 _inc_hwc[i] = e._inc_hwc[i];
237 _exc_hwc[i] = e._exc_hwc[i];
278 inline unsigned int &
id()
282 inline unsigned int id()
const 286 inline void id(
unsigned int i)
294 #ifdef _PROFILER_WITH_PAPI_ 297 int nhwc() comst {
return (_nhwc); };
298 void update_hwc(
long long *,
long long *);
299 long long &inc_hwc(
int i);
300 long long inc_hwc(
int i)
const;
301 long long &exc_hwc(
int i);
302 long long exc_hwc(
int i)
const;
318 #ifdef _PAPI_ENABLED_ 321 {
return(PAPIInit(PAPI_CURRENT_VERSION));};
324 ResetCounters(){
return(PAPI_reset(papiEventSet) == PAPI_OK); };
327 Count(
size_t *counterValues)
329 return(PAPI_read(papiEventSet,counterValues) == PAPI_OK);
333 AccumulateCounters(
size_t *counterValues)
335 return(PAPI_accum(papiEventSet,counterValues) == PAPI_OK);
353 return(rdtsc()*1e-9);
364 typedef std::map<unsigned int,cumulative_stats>
StatMap;
365 typedef std::list<std::pair<unsigned int,StatMap> >
PStatList;
366 typedef std::map<unsigned int,parallel_stats>
PStatMap;
368 typedef std::list<std::pair<unsigned int,std::list<Event> > >
PEventList;
369 typedef std::map<std::string,unsigned int>
FunctionMap;
379 int Init(
const std::string &name,
int id){
return 0;};
386 int Dump(std::ostream &Ostr){
return 0;};
439 int Init(
const std::string &name,
int id);
447 int FunctionEntry(
const std::string &name);
455 int FunctionEntry(
int id);
464 int FunctionExit(
const std::string &name);
473 int FunctionExit(
int id);
481 int FunctionExitAll();
486 int Dump(std::ostream &Ostr);
491 void SetOut(std::ostream *Os){Out = Os;};
496 void SetErr(std::ostream *Oe){Err = Oe;};
501 void SummarizeSerialExecution(std::ostream &Ostr);
506 void WriteEventFile();
511 void DumpEvents(std::ostream &Ostr);
521 int Finalize(
bool writeFiles =
true);
526 int ReadConfig(
const std::string &fname);
531 int ReadEventsFromFile(
const std::string &filename);
536 int ReadParallelEventFiles(
const std::vector<std::string> &infiles,
537 PEventList &par_event_list);
542 int SummarizeParallelExecution(std::ostream &Ostr,
544 PEventList ¶llel_event_list);
548 int ReadSummaryFiles(
const std::vector<std::string> &input_files,
549 ScalaMap &scala_map);
554 int PopulateScalaMap(ScalaMap &scala_map,
555 ScalaStatMap &scala_statmap,
560 int ScalabilitySummary(ScalaStatMap &scala_statmap,std::ostream &Out);
double _inclusive
tree time
std::list< std::pair< unsigned int, StatMap > > PStatList
std::map< std::string, unsigned int > FunctionMap
construct name to unique id.
int FunctionEntry(const std::string &name)
std::ostream * Out
stream for regular output
bool _initd
whether the profiler has been initialized
int Init(const std::string &name, int id)
bool FinalizeReady()
Ready to finalize?
bool operator<(const Event &e) const
bool _finalized
whether the profiler has been finalized
Performance profiling object.
std::map< unsigned int, parallel_stats > PStatMap
std::ostream * Err
stream for errors
double _timestamp
raw timestamp
std::list< Event > event_list
completed events
int Dump(std::ostream &Ostr)
unsigned int verblevel
verbosity level
Event & operator=(const Event &e)
void SetErr(std::ostream *Oe)
Set errstream.
Event(unsigned int i, double e, double it)
Defines MPI-specific parallel global and program classes.
unsigned int call_minrank
FunctionMap function_map
map from construct name to unique id
unsigned int _id
unique identifyer
unsigned int call_maxrank
unsigned int excl_maxrank
void SetOut(std::ostream *Os)
Set outstream.
void const size_t const size_t const size_t const double const double * x
unsigned int profiler_rank
parallel processor id
ConfigMap configmap
map from unique id to construct name
std::ostream & operator<<(std::ostream &ost, const Event &e)
EVENT's stream operator.
static double Time()
Simple timer.
Event(unsigned int i, double ts)
bool _strict
mismatch is error - otherwise automatically exit children
double _exclusive
self time
std::map< unsigned int, scalability_stats > ScalaStatMap
Marks construct entry/exit.
std::list< Event > open_event_list
construct entry events
std::map< unsigned int, std::string > ConfigMap
construct name to unique id.
std::istream & operator>>(std::istream &ist, Event &e)
EVENT's stream operator.
unsigned int incl_maxrank
unsigned int nfunc
total number of constructs profiled
std::vector< unsigned int > nprocs
unsigned int excl_minrank
std::list< std::pair< unsigned int, std::list< Event > > > PEventList
std::map< unsigned int, PStatMap > ScalaMap
std::map< unsigned int, cumulative_stats > StatMap
int FunctionExit(const std::string &name)
int FunctionEntry(int id)
double time0
creation/init time
unsigned int incl_minrank