PlasCom2  1.0
XPACC Multi-physics simluation application
Profiler.H
Go to the documentation of this file.
1 #ifndef _PROFILER_H_
7 #define _PROFILER_H_
8 #include <map>
9 #include <list>
10 #include <vector>
11 #include <string>
12 #include <iostream>
13 #include <sys/time.h>
14 //#include <cstdint>
15 #ifdef _PROFILER_WITH_PAPI_
16 #include <papi.h>
17 #endif
18 
19 
20 #if __cplusplus >= 201103L // C++11 supported?
21 #include <chrono>
22 static __inline__ unsigned long long rdtsc(void)
23 {
24  return(std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch()).count());
25 }
26 
27 #elif defined(__i386) || defined(__x86_64__)
28 
29 static __inline__ unsigned long long rdtsc(void)
30 {
31  unsigned int lo, hi;
32  __asm__ __volatile__ (
33  "cpuid \n"
34  "rdtsc"
35  : "=a"(lo), "=d"(hi) /* outputs */
36  : "a"(0) /* inputs */
37  : "%ebx", "%ecx"); /* clobbers*/
38  return ((unsigned long long)lo) | (((unsigned long long)hi) << 32);
39 }
40 
41 #elif defined(__ia64)
42 
43 static __inline__ unsigned long long rdtsc(void)
44 {
45  unsigned long long r;
46  __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (r) :: "memory");
47  return r;
48 }
49 
50 #elif defined(__i386__)
51 
52 static __inline__ unsigned long long rdtsc(void)
53 {
54  unsigned long long int x;
55  __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
56  return x;
57 }
58 
59 #elif defined(__powerpc__)
60 static __inline__ unsigned long long rdtsc(void)
61 {
62  unsigned long long int result=0;
63  unsigned long int upper, lower,tmp;
64  __asm__ volatile(
65  "0: \n"
66  "\tmftbu %0 \n"
67  "\tmftb %1 \n"
68  "\tmftbu %2 \n"
69  "\tcmpw %2,%0 \n"
70  "\tbne 0b \n"
71  : "=r"(upper),"=r"(lower),"=r"(tmp)
72  );
73  result = upper;
74  result = result<<32;
75  result = result|lower;
76 
77  return(result);
78 }
79 #endif
80 
81 namespace ix {
82 
87  namespace profiler {
88 
93  double incl;
94  double excl;
95  unsigned int ncalls;
96  double incl_dev;
97  double excl_dev;
99  incl = excl = incl_dev = excl_dev = 0.0;
100  ncalls = 0;
101  };
102  };
103 
104 
108  struct parallel_stats {
109  double incl_min;
110  double incl_max;
111  unsigned int incl_minrank;
112  unsigned int incl_maxrank;
113  double incl_mean;
114  double incl_stdev;
115  double excl_min;
116  double excl_max;
117  unsigned int excl_minrank;
118  unsigned int excl_maxrank;
119  double excl_mean;
120  double excl_stdev;
121  double call_mean;
122  unsigned int call_max;
123  unsigned int call_min;
124  unsigned int call_maxrank;
125  unsigned int call_minrank;
126  double call_stdev;
128  incl_mean = incl_stdev = excl_min = excl_max = 0.0;
129  excl_mean = excl_stdev = call_mean = call_stdev = 0.0;
130  incl_minrank = incl_maxrank = excl_minrank = 0;
131  excl_maxrank = call_max = call_min = call_maxrank = 0;
132  call_minrank = 0;
133  incl_min = incl_max = 0.0;
134  };
135  };
136 
141  {
142  std::vector<unsigned int> nprocs;
143  std::vector<double> sstats[18];
144  };
145 
152  class Event {
153  friend std::ostream &operator<<(std::ostream &,const Event &);
154  friend std::istream &operator>>(std::istream &,Event &);
155  protected:
157  unsigned int _id;
159  double _exclusive;
161  double _inclusive;
163  double _timestamp;
164 #ifdef _PROFILER_WITH_PAPI_
165  int _nhwc;
168  size_t *_inc_hwc;
170  size_t *_exc_hwc;
171  bool _hwc;
172 #endif
173  public:
175  : _id(0),_exclusive(0.),_inclusive(0.),_timestamp(0.)
176  {
177 // #ifdef _PROFILER_WITH_PAPI_
178 // init_papi();
179 // #endif
180  };
181  Event(unsigned int i)
182  : _id(i),_exclusive(0.),_inclusive(0.),_timestamp(0.)
183  {
184 // #ifdef _PROFILER_WITH_PAPI_
185 // init_papi();
186 // #endif
187  };
188  Event(unsigned int i,double ts)
189  : _id(i),_exclusive(0.),_inclusive(0.),_timestamp(0.)
190  {
191 #ifdef _PROFILER_WITH_PAPI_
192  init_papi();
193 #endif
194  };
195  Event(unsigned int i,double e,double it)
196  : _id(i),_exclusive(e),_inclusive(it),_timestamp(0.)
197  {
198 #ifdef _PROFILER_WITH_PAPI_
199  init_papi();
200 #endif
201  };
202  Event(const Event &e)
203  : _id(e._id),_exclusive(e._exclusive),_inclusive(e._inclusive),
204  _timestamp(e._timestamp)
205  {
206 #ifdef _PROFILER_WITH_PAPI_
207  _nhwc = e._nhwc;
208  _inc_hwc = new long long [_nhwc];
209  _exc_hwc = new long long [_nhwc];
210  hwc = true;
211 #endif
212  };
213 #ifdef _PROFILER_WITH_PAPI_
214  ~Event()
215  {
216  if(_hwc){
217  delete [] _inc_hwc;
218  delete [] _exc_hwc;
219  }
220  };
221 #endif
222  Event &
223  operator=(const Event &e)
224  {
225  _id = e._id;
226  _exclusive = e._exclusive;
227  _inclusive = e._inclusive;
228  _timestamp = e._timestamp;
229 #ifdef _PROFILER_WITH_PAPI_
230  if(!_hwc){
231  _nwhc = e._nhwc;
232  _inc_hwc = new long long [_nhwc];
233  _exc_hwc = new long long [_nhwc];
234  }
235  for(int i = 0;i < _nwhc;i++){
236  _inc_hwc[i] = e._inc_hwc[i];
237  _exc_hwc[i] = e._exc_hwc[i];
238  }
239 #endif
240  return(*this);
241  };
242  inline double &exclusive()
243  {
244  return (_exclusive);
245  };
246  inline double exclusive() const
247  {
248  return (_exclusive);
249  };
250  inline double &inclusive()
251  {
252  return (_inclusive);
253  };
254  inline double inclusive() const
255  {
256  return (_inclusive);
257  };
258  inline void exclusive(double e)
259  {
260  _exclusive = e;
261  };
262  inline void inclusive(double i)
263  {
264  _inclusive = i;
265  };
266  inline double &timestamp()
267  {
268  return (_timestamp);
269  };
270  inline double timestamp() const
271  {
272  return (_timestamp);
273  };
274  inline void timestamp(double t)
275  {
276  _timestamp = t;
277  };
278  inline unsigned int &id()
279  {
280  return(_id);
281  };
282  inline unsigned int id() const
283  {
284  return(_id);
285  };
286  inline void id(unsigned int i)
287  {
288  _id = i;
289  };
290  inline bool operator<(const Event &e) const
291  {
292  return(_timestamp < e._timestamp);
293  };
294 #ifdef _PROFILER_WITH_PAPI_
295  int papiEventSet;
296  void init_papi();
297  int nhwc() comst { return (_nhwc); };
298  void update_hwc(long long *,long long *);
299  long long &inc_hwc(int i);
300  long long inc_hwc(int i) const;
301  long long &exc_hwc(int i);
302  long long exc_hwc(int i) const;
303 #endif
304  };
305 
309  std::ostream &
310  operator<<(std::ostream &ost,const Event &e);
311 
315  std::istream &
316  operator>>(std::istream &ist,Event &e);
317 
318 #ifdef _PAPI_ENABLED_
319  static inline int
320  PAPIInit()
321  {return(PAPIInit(PAPI_CURRENT_VERSION));};
322 
323  static inline int
324  ResetCounters(){ return(PAPI_reset(papiEventSet) == PAPI_OK); };
325 
326  static inline int
327  Count(size_t *counterValues)
328  {
329  return(PAPI_read(papiEventSet,counterValues) == PAPI_OK);
330  };
331 
332  static inline int
333  AccumulateCounters(size_t *counterValues)
334  {
335  return(PAPI_accum(papiEventSet,counterValues) == PAPI_OK);
336  };
337 
338 #endif
339  static inline double
348  {
349 // struct timeval tv;
350 // gettimeofday(&tv,NULL);
351 // double t = tv.tv_sec + tv.tv_usec/1000000.;
352 // return(t);
353  return(rdtsc()*1e-9);
354  }
355 
359  typedef std::map<std::string,unsigned int> FunctionMap;
363  typedef std::map<unsigned int,std::string> ConfigMap;
364  typedef std::map<unsigned int,cumulative_stats> StatMap;
365  typedef std::list<std::pair<unsigned int,StatMap> > PStatList;
366  typedef std::map<unsigned int,parallel_stats> PStatMap;
367  typedef std::map<unsigned int,PStatMap> ScalaMap;
368  typedef std::list<std::pair<unsigned int,std::list<Event> > > PEventList;
369  typedef std::map<std::string,unsigned int> FunctionMap;
370  typedef std::map<unsigned int,scalability_stats> ScalaStatMap;
371 
376 
377  public:
378  int Init(int id){return 0;};
379  int Init(const std::string &name,int id){return 0;};
380  int FunctionEntry(const std::string &name){return 0;};
381  int FunctionEntry(int id){return 0;};
382  int FunctionExit(const std::string &name){return 0;};
383  int FunctionExit(int id){return 0;};
384  int FunctionExitAll(){return 0;};
385  int Finalize(){return 0;};
386  int Dump(std::ostream &Ostr){return 0;};
387  bool Ready() {return true;};
388  };
389 
398  class ProfilerObj {
399  protected:
401  unsigned int profiler_rank;
403  unsigned int verblevel;
405  std::ostream *Out;
407  std::ostream *Err;
409  double time0;
411  std::list<Event> open_event_list;
413  std::list<Event> event_list;
415  FunctionMap function_map;
417  ConfigMap configmap;
419  unsigned int nfunc;
420 
421  public:
422  ProfilerObj();
423 
429  int Init(int id);
430 
439  int Init(const std::string &name,int id);
440 
447  int FunctionEntry(const std::string &name);
448 
455  int FunctionEntry(int id);
456 
464  int FunctionExit(const std::string &name);
465 
473  int FunctionExit(int id);
474 
481  int FunctionExitAll();
482 
486  int Dump(std::ostream &Ostr);
487 
491  void SetOut(std::ostream *Os){Out = Os;};
492 
496  void SetErr(std::ostream *Oe){Err = Oe;};
497 
501  void SummarizeSerialExecution(std::ostream &Ostr);
502 
506  void WriteEventFile();
507 
511  void DumpEvents(std::ostream &Ostr);
512 
516  bool FinalizeReady(){return (open_event_list.size() == 1); };
517 
521  int Finalize(bool writeFiles = true);
522 
526  int ReadConfig(const std::string &fname);
527 
531  int ReadEventsFromFile(const std::string &filename);
532 
536  int ReadParallelEventFiles(const std::vector<std::string> &infiles,
537  PEventList &par_event_list);
538 
542  int SummarizeParallelExecution(std::ostream &Ostr,
543  std::ostream &Ouf,
544  PEventList &parallel_event_list);
548  int ReadSummaryFiles(const std::vector<std::string> &input_files,
549  ScalaMap &scala_map);
550 
554  int PopulateScalaMap(ScalaMap &scala_map,
555  ScalaStatMap &scala_statmap,
556  bool is_scaled);
560  int ScalabilitySummary(ScalaStatMap &scala_statmap,std::ostream &Out);
561 
562  private:
564  bool _strict;
566  bool _initd;
569 
570  };
571  };
572 };
573 
574 #endif
double _inclusive
tree time
Definition: Profiler.H:161
std::list< std::pair< unsigned int, StatMap > > PStatList
Definition: Profiler.H:365
std::map< std::string, unsigned int > FunctionMap
construct name to unique id.
Definition: Profiler.H:359
int FunctionEntry(const std::string &name)
Definition: Profiler.H:380
std::ostream * Out
stream for regular output
Definition: Profiler.H:405
bool _initd
whether the profiler has been initialized
Definition: Profiler.H:566
int Init(const std::string &name, int id)
Definition: Profiler.H:379
void exclusive(double e)
Definition: Profiler.H:258
bool FinalizeReady()
Ready to finalize?
Definition: Profiler.H:516
double & timestamp()
Definition: Profiler.H:266
double inclusive() const
Definition: Profiler.H:254
bool operator<(const Event &e) const
Definition: Profiler.H:290
Utility struct.
Definition: Profiler.H:108
bool _finalized
whether the profiler has been finalized
Definition: Profiler.H:568
Performance profiling object.
Definition: Profiler.H:398
std::map< unsigned int, parallel_stats > PStatMap
Definition: Profiler.H:366
std::ostream * Err
stream for errors
Definition: Profiler.H:407
double & exclusive()
Definition: Profiler.H:242
double _timestamp
raw timestamp
Definition: Profiler.H:163
std::list< Event > event_list
completed events
Definition: Profiler.H:413
int Dump(std::ostream &Ostr)
Definition: Profiler.H:386
unsigned int verblevel
verbosity level
Definition: Profiler.H:403
Event & operator=(const Event &e)
Definition: Profiler.H:223
void SetErr(std::ostream *Oe)
Set errstream.
Definition: Profiler.H:496
Event(unsigned int i, double e, double it)
Definition: Profiler.H:195
Defines MPI-specific parallel global and program classes.
FunctionMap function_map
map from construct name to unique id
Definition: Profiler.H:415
unsigned int _id
unique identifyer
Definition: Profiler.H:157
void SetOut(std::ostream *Os)
Set outstream.
Definition: Profiler.H:491
void const size_t const size_t const size_t const double const double * x
unsigned int profiler_rank
parallel processor id
Definition: Profiler.H:401
ConfigMap configmap
map from unique id to construct name
Definition: Profiler.H:417
double & inclusive()
Definition: Profiler.H:250
std::ostream & operator<<(std::ostream &ost, const Event &e)
EVENT&#39;s stream operator.
Definition: Profiler.C:131
Event(const Event &e)
Definition: Profiler.H:202
static double Time()
Simple timer.
Definition: Profiler.H:347
Event(unsigned int i, double ts)
Definition: Profiler.H:188
bool _strict
mismatch is error - otherwise automatically exit children
Definition: Profiler.H:564
double _exclusive
self time
Definition: Profiler.H:159
std::map< unsigned int, scalability_stats > ScalaStatMap
Definition: Profiler.H:370
Marks construct entry/exit.
Definition: Profiler.H:152
std::list< Event > open_event_list
construct entry events
Definition: Profiler.H:411
void id(unsigned int i)
Definition: Profiler.H:286
Event(unsigned int i)
Definition: Profiler.H:181
void inclusive(double i)
Definition: Profiler.H:262
unsigned int & id()
Definition: Profiler.H:278
std::map< unsigned int, std::string > ConfigMap
construct name to unique id.
Definition: Profiler.H:363
std::istream & operator>>(std::istream &ist, Event &e)
EVENT&#39;s stream operator.
Definition: Profiler.C:140
unsigned int nfunc
total number of constructs profiled
Definition: Profiler.H:419
std::vector< unsigned int > nprocs
Definition: Profiler.H:142
double timestamp() const
Definition: Profiler.H:270
std::list< std::pair< unsigned int, std::list< Event > > > PEventList
Definition: Profiler.H:368
double exclusive() const
Definition: Profiler.H:246
std::map< unsigned int, PStatMap > ScalaMap
Definition: Profiler.H:367
std::map< unsigned int, cumulative_stats > StatMap
Definition: Profiler.H:364
int FunctionExit(const std::string &name)
Definition: Profiler.H:382
void timestamp(double t)
Definition: Profiler.H:274
double time0
creation/init time
Definition: Profiler.H:409
unsigned int id() const
Definition: Profiler.H:282