19 operator-(const struct timeval &t1,
const struct timeval &t2)
22 rtv.tv_usec = t1.tv_usec - t2.tv_usec;
23 rtv.tv_sec = t1.tv_sec - t2.tv_sec;
30 inline struct timeval &
33 t1.tv_usec -= t2.tv_usec;
34 t1.tv_sec -= t2.tv_sec;
42 operator+(const struct timeval &t1,
const struct timeval &t2)
45 rtv.tv_usec = t1.tv_usec + t2.tv_usec;
46 rtv.tv_sec = t1.tv_sec + t2.tv_sec;
53 inline struct timeval &
56 t1.tv_usec += t2.tv_usec;
57 t1.tv_sec += t2.tv_sec;
65 operator==(
const struct timeval &t1,
const struct timeval &t2)
67 return((t1.tv_sec + (t1.tv_usec/1000000.0)) ==
68 (t2.tv_sec + (t2.tv_usec/1000000.0)));
75 operator!=(
const struct timeval &t1,
const struct timeval &t2)
86 return((t1.tv_sec + (t1.tv_usec/1000000.0)) <
87 (t2.tv_sec + (t2.tv_usec/1000000.0)));
94 operator>(
const struct timeval &t1,
const struct timeval &t2)
96 return((t1.tv_sec + (t1.tv_usec/1000000.0)) >
97 (t2.tv_sec + (t2.tv_usec/1000000.0)));
104 operator<=(
const struct timeval &t1,
const struct timeval &t2)
106 return((t1 < t2) || (t1 == t2));
113 operator>=(
const struct timeval &t1,
const struct timeval &t2)
115 return((t1 > t2) || (t1 == t2));
123 operator<(const std::pair<unsigned int,std::list<Event> > &p1,
124 const std::pair<unsigned int,std::list<Event> > &p2)
126 return (p1.first < p2.first);
147 typedef std::map<unsigned int,cumulative_stats>
StatMap;
148 typedef std::list<std::pair<unsigned int,StatMap> >
PStatList;
149 typedef std::map<unsigned int,parallel_stats>
PStatMap;
150 typedef std::map<unsigned int,PStatMap>
ScalaMap;
151 typedef std::list<std::pair<unsigned int,std::list<Event> > >
PEventList;
152 typedef std::map<unsigned int,scalability_stats>
ScalaStatMap;
168 std::cerr <<
"ProfilerObj::Init: Error: already initialized." << std::endl;
172 #ifdef WITH_HPM_TOOLKIT 173 hpmInit((
int)
id,(
char *)
configmap[0].c_str());
175 #ifdef _PROFILER_WITH_PAPI_ 194 begref.timestamp(time0);
200 std::cerr <<
"ProfilerObj::Init: Error: already initialized. Tried to reinit with " 201 << name <<
"." << std::endl;
219 #ifdef WITH_HPM_TOOLKIT 220 hpmStart(
id,(
char *)name.c_str());
222 #ifdef _PROFILER_WITH_PAPI_ 236 assert(!((
unsigned int)
id <= 0));
237 Event e((
unsigned int)
id);
241 #ifdef WITH_HPM_TOOLKIT 244 std::ostringstream Ostr;
245 Ostr <<
"Function" << id;
250 hpmStart(
id,(
char *)name.c_str());
252 #ifdef _PROFILER_WITH_PAPI_ 272 <<
"):" << name <<
", expected " 274 assert(
id == ei->id());
275 double inclusive = t - ei->timestamp();
276 double exclusive = inclusive - ei->exclusive();
278 ei->inclusive(inclusive);
279 ei->exclusive(exclusive);
280 #ifdef WITH_HPM_TOOLKIT 283 #ifdef _PROFILER_WITH_PAPI_ 292 ei->exclusive(t + inclusive);
293 #ifdef _PROFILER_WITH_PAPI_ 300 #ifdef WITH_HPM_TOOLKIT 304 assert((
unsigned int)
id == ei->id());
306 double inclusive = t - ei->timestamp();
307 double exclusive = inclusive - ei->exclusive();
309 ei->inclusive(inclusive);
310 ei->exclusive(exclusive);
311 #ifdef _PROFILER_WITH_PAPI_ 320 ei->exclusive(t + inclusive);
321 #ifdef _PROFILER_WITH_PAPI_ 332 unsigned int id = ei->id();
333 #ifdef WITH_HPM_TOOLKIT 337 double inclusive = t - ei->timestamp();
338 double exclusive = inclusive - ei->exclusive();
340 ei->inclusive(inclusive);
341 ei->exclusive(exclusive);
342 #ifdef _PROFILER_WITH_PAPI_ 352 ei->exclusive(t + inclusive);
356 #ifdef _PROFILER_WITH_PAPI_ 368 *
Err <<
"ProfilerObj::Error: Config file name empty." 372 std::ifstream conf_file;
375 conf_file.open(cfname.c_str());
378 *
Err <<
"ProfilerObj::Error: Could not open config file, " 379 << cfname <<
"." << std::endl;
382 std::string configline;
383 while(std::getline(conf_file,configline)){
384 std::istringstream Istr(configline);
386 std::getline(Istr,routine);
395 std::string application_name =
"Application";
396 std::map<unsigned int,cumulative_stats> statmap;
397 std::list<Event>::iterator ei =
event_list.begin();
400 std::map<unsigned int,cumulative_stats>::iterator si;
401 si = statmap.find(ei->id());
402 if(si == statmap.end()){
404 cs.
incl = ei->inclusive();
405 cs.
excl = ei->exclusive();
409 statmap.insert(std::make_pair(ei->id(),cs));
412 si->second.incl += ei->inclusive();
413 si->second.excl += ei->exclusive();
415 si->second.incl_dev += (ei->inclusive() * ei->inclusive());
416 si->second.excl_dev += (ei->exclusive() * ei->exclusive());
420 std::map<unsigned int,cumulative_stats>::iterator si = statmap.begin();
421 std::map<unsigned int,std::string>::iterator cmi =
configmap.find(0);
423 application_name = cmi->second;
425 Ostr <<
"#Statistics for " << application_name <<
":" << std::endl
426 <<
"#Total Execution Time: " << ei->inclusive() << std::endl
427 <<
"#------------------------------------------" 428 <<
"Breakdown by Routine" 429 <<
"------------------------------------------" << std::endl
430 <<
"#Routine Name Inclusive Exclusive" 432 <<
" I-Std E-Mean E-Std " << std::endl
433 <<
"#------------------------------- ------------ ------------" 434 <<
" ----- ------------" 435 <<
" ------------ ------------ ------------" << std::endl;
436 while(si != statmap.end()){
437 std::string routine_name =
"Unknown";
439 Ostr << std::setiosflags(std::ios::left);
441 routine_name = cmi->second;
443 std::ostringstream Ostr;
444 Ostr << routine_name <<
" (" << si->first <<
")";
445 routine_name = Ostr.str();
447 double imean = si->second.incl/(double)si->second.ncalls;
448 double emean = si->second.excl/(
double)si->second.ncalls;
449 double imean2 = si->second.incl_dev/(double)si->second.ncalls;
450 double emean2 = si->second.excl_dev/(
double)si->second.ncalls;
451 Ostr << std::setw(32) << routine_name <<
" ";
452 Ostr << std::setprecision(5)
453 << std::setw(12) << si->second.incl <<
" " 454 << std::setw(12) << si->second.excl <<
" " 455 << std::setw(12) << si->second.ncalls <<
" " 456 << std::setw(12) << imean <<
" " 458 << (si->second.ncalls == 1 ? 0 :
459 sqrt(std::fabs(imean2-(imean*imean))))
461 << std::setw(12) << emean <<
" " 463 << (si->second.ncalls == 1 ? 0 :
464 sqrt(std::fabs(emean2-(emean*emean))))
477 std::ofstream eventfile;
478 std::ostringstream Ostr;
489 eventfile.open(Ostr.str().c_str());
500 ei->inclusive(t - ei->timestamp());
501 ei->exclusive(ei->inclusive()-ei->exclusive());
503 #ifdef _PROFILER_WITH_PAPI_ 514 std::ofstream configfile;
515 std::ostringstream Bfn;
517 configfile.open(Bfn.str().c_str());
520 configfile << fmi->second <<
" " << fmi->first << std::endl;
530 #ifdef WITH_HPM_TOOLKIT 533 #ifdef _PROFILER_WITH_PAPI_ 541 std::ifstream datafile;
542 datafile.open(filename.c_str());
545 *
Err <<
"ProfilerObj::ReadEventsFromFile: Error: Could not" 546 <<
" open datafile, " << filename <<
"." << std::endl;
563 *
Err <<
"ProfilerObj::ReadParallelEventFiles:Error: No input files." 568 std::vector<std::string>::const_iterator ifi = ifiles.begin();
569 while(ifi != ifiles.end()){
571 Inf.open(ifi->c_str());
574 *
Err <<
"ProfilerObj::ReadParallelEventFiles:Error: Unable to open" 575 <<
" event file, " << *ifi <<
"." << std::endl;
585 par_event_list.push_back(std::make_pair(profiler_rank,
event_list));
588 par_event_list.sort();
597 if(parallel_event_list.empty())
599 std::string application_name =
"Application";
600 std::map<unsigned int,std::string>::iterator cmi =
configmap.find(0);
602 application_name = cmi->second;
603 PEventList::reverse_iterator peri = parallel_event_list.rbegin();
605 unsigned int number_of_processors = peri->first + 1;
606 Ouf << number_of_processors << std::endl;
609 std::map<unsigned int,cumulative_stats> statmap;
610 PEventList::iterator peli = parallel_event_list.begin();
611 while(peli != parallel_event_list.end()){
613 std::list<Event>::const_iterator eli = peli->second.begin();
614 while(eli != peli->second.end()){
615 std::map<unsigned int,cumulative_stats>::iterator si;
616 si = statmap.find(eli->id());
617 if(si == statmap.end()){
619 cs.
incl = eli->inclusive();
620 cs.
excl = eli->exclusive();
624 statmap.insert(std::make_pair(eli->id(),cs));
627 if(eli->inclusive() < 0){
629 *
Err <<
"ProfilerObj::SummarizeParallelExecution:Error: " 630 <<
"Read existing negative inclusive value:" 631 << eli->inclusive() << std::endl;
634 if(eli->exclusive() < 0){
636 *
Err <<
"ProfilerObj::SummarizeParallelExecution:Error: " 637 <<
"Read existing negative exclusive value:" 638 << eli->inclusive() << std::endl;
641 si->second.incl += eli->inclusive();
642 si->second.excl += eli->exclusive();
644 si->second.incl_dev += (eli->inclusive() * eli->inclusive());
645 si->second.excl_dev += (eli->exclusive() * eli->exclusive());
649 parallel_cstat_list.push_back(std::make_pair(
profiler_rank,statmap));
656 PStatList::iterator psli = parallel_cstat_list.begin();
658 while(psli != parallel_cstat_list.end()){
661 std::map<unsigned int,cumulative_stats>::iterator si =
662 psli->second.begin();
663 while(si != psli->second.end()){
664 std::map<unsigned int,parallel_stats>::iterator psmi;
666 psmi = pstat_map.find(si->first);
668 if(psmi == pstat_map.end()){
673 ps.
incl_stdev = si->second.incl * si->second.incl;
679 ps.
excl_stdev = si->second.excl * si->second.excl;
685 ps.
call_stdev = si->second.ncalls * si->second.ncalls;
688 pstat_map.insert(std::make_pair(si->first,ps));
692 if(psmi->second.incl_min > si->second.incl){
693 psmi->second.incl_min = si->second.incl;
696 if(psmi->second.incl_max < si->second.incl){
697 psmi->second.incl_max = si->second.incl;
700 psmi->second.incl_mean += si->second.incl;
701 psmi->second.incl_stdev += si->second.incl * si->second.incl;
703 if(psmi->second.excl_min > si->second.excl){
704 psmi->second.excl_min = si->second.excl;
707 if(psmi->second.excl_max < si->second.excl){
708 psmi->second.excl_max = si->second.excl;
711 psmi->second.excl_mean += si->second.excl;
712 psmi->second.excl_stdev += si->second.excl * si->second.excl;
714 if(psmi->second.call_max < si->second.ncalls){
715 psmi->second.call_max = si->second.ncalls;
718 if(psmi->second.call_min > si->second.ncalls){
719 psmi->second.call_min = si->second.ncalls;
722 psmi->second.call_mean += si->second.ncalls;
723 psmi->second.call_stdev += si->second.ncalls * si->second.ncalls;
729 std::map<unsigned int,parallel_stats>::iterator si = pstat_map.begin();
730 Ostr <<
"#Statistics for " << application_name <<
" (" 731 << number_of_processors <<
" procs):" << std::endl << std::endl
732 <<
"#----------------------------------Inclusive Statistics" 733 <<
"------------------------------" << std::endl
736 <<
" Mean Inc " << std::endl
737 <<
"#Routine Name Duration Rank " 739 <<
" Duration Std Dev " << std::endl
740 <<
"#-------------------- ------------ ----- " 741 <<
"------------ -----" 742 <<
" ------------ ------------" << std::endl;
743 while(si != pstat_map.end()){
744 std::string routine_name =
"Unknown";
746 Ostr << std::setiosflags(std::ios::left);
748 routine_name = cmi->second;
750 std::ostringstream Ostr2;
751 Ostr2 << routine_name <<
" (" << si->first <<
")";
752 routine_name = Ostr2.str();
754 double imean = si->second.incl_mean/(double)number_of_processors;
755 double imean2 = (si->second.incl_min == si->second.incl_max ? 0 :
756 (sqrt(fabs((si->second.incl_stdev/
757 (
double)number_of_processors)
758 - (imean * imean)))));
759 Ostr << std::setw(32) << routine_name <<
" " 760 << std::setw(12) << si->second.incl_min <<
" " 761 << std::setw(5) << si->second.incl_minrank <<
" " 762 << std::setw(12) << si->second.incl_max <<
" " 763 << std::setw(5) << si->second.incl_maxrank <<
" " 764 << std::setw(12) << imean <<
" " 765 << std::setw(12) << imean2 <<
" " 767 Ouf << si->first <<
" " << si->second.incl_min <<
" " 768 << si->second.incl_minrank <<
" " 769 << si->second.incl_max <<
" " << si->second.incl_maxrank
770 <<
" " << imean <<
" " << imean2 << std::endl;
773 si = pstat_map.begin();
774 Ostr <<
"#----------------------------------Exclusive Statistics" 775 <<
"------------------------------" << std::endl
778 <<
" Mean Exc " << std::endl
779 <<
"#Routine Name Duration Rank " 781 <<
" Duration Std Dev " << std::endl
782 <<
"#-------------------- ------------ ----- " 783 <<
"------------ -----" 784 <<
" ------------ ------------" << std::endl;
785 while(si != pstat_map.end()){
786 std::string routine_name =
"Unknown";
788 Ostr << std::setiosflags(std::ios::left);
790 routine_name = cmi->second;
792 std::ostringstream Ostr2;
793 Ostr2 << routine_name <<
" (" << si->first <<
")";
794 routine_name = Ostr2.str();
796 double emean = si->second.excl_mean/(double)number_of_processors;
797 double emean2 = (si->second.excl_max == si->second.excl_min ? 0 :
798 (sqrt(fabs((si->second.excl_stdev/
799 (
double)number_of_processors)
800 - (emean * emean)))));
801 Ostr << std::setw(32) << routine_name <<
" " 802 << std::setw(12) << si->second.excl_min <<
" " 803 << std::setw(5) << si->second.excl_minrank <<
" " 804 << std::setw(12) << si->second.excl_max <<
" " 805 << std::setw(5) << si->second.excl_maxrank <<
" " 806 << std::setw(12) << emean <<
" " 807 << std::setw(12) << emean2 <<
" " 809 Ouf << si->first <<
" " << si->second.excl_min <<
" " 810 << si->second.excl_minrank <<
" " 811 << si->second.excl_max <<
" " << si->second.excl_maxrank
812 <<
" " << emean <<
" " << emean2 << std::endl;
825 int number_of_runs = input_files.size();
826 if(number_of_runs < 1){
828 *
Err <<
"ProfilerObj::ReadSummaryFiles:Error: No input files." 832 std::vector<int> problem_sizes;
833 problem_sizes.resize(number_of_runs);
835 unsigned int runsize = 0;
836 std::vector<std::string>::const_iterator ifi = input_files.begin();
837 while(ifi != input_files.end()){
840 Inf.open(ifi->c_str());
843 *
Err <<
"ProfilerObj::ReadSummaryFiles:Error: Cannot open summary " 844 <<
"file, " << *ifi <<
"." << std::endl;
847 unsigned int id, minrank, maxrank;
848 double min, max, mean, stddev;
850 while(Inf >>
id >> min >> minrank >> max >> maxrank >> mean >> stddev){
854 PStatMap::iterator psi = pstats.find(
id);
855 if(psi == pstats.end()){
863 pstats.insert(std::make_pair(
id,pso));
869 psi->second.excl_min = min;
870 psi->second.excl_minrank = minrank;
871 psi->second.excl_max = max;
872 psi->second.excl_maxrank = maxrank;
873 psi->second.excl_mean = mean;
874 psi->second.excl_stdev = stddev;
883 ScalaMap::iterator smi = scala_map.find(runsize);
884 if(smi != scala_map.end()){
886 *
Err <<
"ProfilerObj::ReadSummaryFiles:Error: Cannot process " 887 <<
"multiple runs of the same size." << std::endl;
890 problem_sizes[number_of_runs++] = runsize;
892 scala_map.insert(std::make_pair(runsize,pstats));
907 if(scala_map.empty() || (scala_map.size() == 1)){
909 *
Err <<
"ProfilerObj::Error: Invalid number of runs for scalability analysis." 920 ScalaMap::iterator scalamap_i = scala_map.begin();
922 while(scalamap_i != scala_map.end()){
923 int number_of_processors = scalamap_i->first;
933 PStatMap::iterator psm_i = scalamap_i->second.begin();
934 while(psm_i != scalamap_i->second.end()){
936 ScalaStatMap::iterator ssmi = scala_statmap.find(psm_i->first);
937 if(ssmi == scala_statmap.end()){
940 ss.
nprocs.push_back(number_of_processors);
941 unsigned int cpos = ss.
nprocs.size() - 1;
942 double probsize = (is_scaled ? number_of_processors : 1);
943 double probsize0 = (is_scaled ? ss.
nprocs[0] : 1);
949 ss.
nprocs[cpos])/probsize));
952 ss.
nprocs[cpos])/probsize));
955 ss.
nprocs[cpos])/probsize));
957 number_of_processors);
959 number_of_processors);
961 number_of_processors);
968 ss.
nprocs[cpos])/probsize));
971 ss.
nprocs[cpos])/probsize));
974 ss.
nprocs[cpos])/probsize));
976 number_of_processors);
978 number_of_processors);
980 number_of_processors);
982 scala_statmap.insert(std::make_pair(psm_i->first,ss));
988 ssmi->second.nprocs.push_back(number_of_processors);
989 unsigned int cpos = ssmi->second.nprocs.size() - 1;
990 double probsize = (is_scaled ? number_of_processors : 1);
991 double probsize0 = (is_scaled ? ssmi->second.nprocs[0] : 1);
992 ssmi->second.sstats[
IMIN_T].push_back(psm_i->second.incl_min);
993 ssmi->second.sstats[
IMAX_T].push_back(psm_i->second.incl_max);
994 ssmi->second.sstats[
IMEAN_T].push_back(psm_i->second.incl_mean);
995 ssmi->second.sstats[
IMIN_E].push_back
996 (((ssmi->second.sstats[
IMIN_T][0]*ssmi->second.nprocs[0])/probsize0)/
997 ((ssmi->second.sstats[
IMIN_T][cpos]*ssmi->second.nprocs[cpos])/
999 ssmi->second.sstats[
IMAX_E].push_back
1000 (((ssmi->second.sstats[
IMAX_T][0]*ssmi->second.nprocs[0])/probsize0)/
1001 ((ssmi->second.sstats[
IMAX_T][cpos]*ssmi->second.nprocs[cpos])/
1003 ssmi->second.sstats[
IMEAN_E].push_back
1004 (((ssmi->second.sstats[
IMEAN_T][0]*ssmi->second.nprocs[0])/
1005 probsize0)/((ssmi->second.sstats[
IMEAN_T][cpos]*
1006 ssmi->second.nprocs[cpos])/probsize));
1007 ssmi->second.sstats[
IMIN_S].push_back
1008 (ssmi->second.sstats[
IMIN_E][cpos]*number_of_processors);
1009 ssmi->second.sstats[
IMAX_S].push_back
1010 (ssmi->second.sstats[
IMAX_E][cpos]*number_of_processors);
1011 ssmi->second.sstats[
IMEAN_S].push_back
1012 (ssmi->second.sstats[
IMEAN_E][cpos]*number_of_processors);
1014 ssmi->second.sstats[
EMIN_T].push_back(psm_i->second.excl_min);
1015 ssmi->second.sstats[
EMAX_T].push_back(psm_i->second.excl_max);
1016 ssmi->second.sstats[
EMEAN_T].push_back(psm_i->second.excl_mean);
1017 ssmi->second.sstats[
EMIN_E].push_back
1018 (((ssmi->second.sstats[
EMIN_T][0]*ssmi->second.nprocs[0])/
1019 probsize0)/((ssmi->second.sstats[
EMIN_T][cpos]*
1020 ssmi->second.nprocs[cpos])/probsize));
1021 ssmi->second.sstats[
EMAX_E].push_back
1022 (((ssmi->second.sstats[
EMAX_T][0]*ssmi->second.nprocs[0])/
1023 probsize0)/((ssmi->second.sstats[
EMAX_T][cpos]*
1024 ssmi->second.nprocs[cpos])/probsize));
1025 ssmi->second.sstats[
EMEAN_E].push_back
1026 (((ssmi->second.sstats[
EMEAN_T][0]*ssmi->second.nprocs[0])/
1027 probsize0)/((ssmi->second.sstats[
EMEAN_T][cpos]*
1028 ssmi->second.nprocs[cpos])/probsize));
1029 ssmi->second.sstats[
EMIN_S].push_back
1030 (ssmi->second.sstats[
EMIN_E][cpos]*number_of_processors);
1031 ssmi->second.sstats[
EMAX_S].push_back
1032 (ssmi->second.sstats[
EMAX_E][cpos]*number_of_processors);
1033 ssmi->second.sstats[
EMEAN_S].push_back
1034 (ssmi->second.sstats[
EMEAN_E][cpos]*number_of_processors);
1045 std::string appname =
"Unknown";
1046 std::map<unsigned int,std::string>::iterator cfi =
configmap.find(0);
1048 appname.assign(cfi->second);
1050 ScalaStatMap::iterator ssm_i = scala_statmap.begin();
1051 Out <<
"############# Scalability Summary for " << appname <<
" " 1052 <<
"#############" << std::endl;
1053 while(ssm_i != scala_statmap.end()){
1054 std::string routine_name;
1055 std::map<unsigned int,std::string>::iterator cfi =
configmap.find(ssm_i->first);
1057 routine_name.assign(cfi->second);
1059 std::ostringstream Ostr;
1060 Ostr <<
"Unknown(" << ssm_i->first <<
")";
1061 routine_name.assign(Ostr.str());
1064 unsigned int nps = ss.
nprocs.size();
1067 Out <<
"# " << routine_name <<
":" << std::endl
1068 <<
"#-------------------------------------------------" 1069 <<
"------------------------------------------------------------" 1070 <<
"-----------------------------------------------" << std::endl
1071 <<
"# Inclusive Max Inclusive Mean " 1072 <<
" Inclusive Min Exclusive Max " 1073 <<
" Exclusive Mean" 1074 <<
" Exclusive Min" << std::endl
1075 <<
"# NProc Time(Eff)(Speedup) Time(Eff)(Speedup)" 1076 <<
" Time(Eff)(Speedup) Time(Eff)(Speedup) " 1077 <<
"Time(Eff)(Speedup) Time(Eff)(Speedup)" << std::endl
1078 <<
"#-------------------------------------------------" 1079 <<
"------------------------------------------------------------" 1080 <<
"-----------------------------------------------" << std::endl;
1081 for(
unsigned int a = 0;
a < nps;
a++){
1082 Out << std::resetiosflags(std::ios::floatfield)
1083 << std::setiosflags(std::ios::right) << std::setprecision(0)
1084 << std::setw(5) << ss.
nprocs[
a] <<
" " 1086 << std::setprecision(3) << std::showpoint << std::fixed
1087 << std::setw(9) << std::setiosflags(std::ios::right) << ss.
sstats[
IMAX_T][
a]
1088 <<
" " << std::setiosflags(std::ios::left) << std::setprecision(2)
1090 << std::setw(7) << std::setprecision(1) << ss.
sstats[
IMAX_S][
a] <<
" " 1092 << std::setprecision(3) << std::showpoint << std::fixed
1093 << std::setw(9) << std::setiosflags(std::ios::right) << ss.
sstats[
IMEAN_T][
a]
1094 <<
" " << std::setiosflags(std::ios::left) << std::setprecision(2) << std::setw(4)
1096 << std::setw(7) << std::setprecision(1) << ss.
sstats[
IMEAN_S][
a] <<
" " 1098 << std::setprecision(3) << std::showpoint << std::fixed
1099 << std::setw(9) << std::setiosflags(std::ios::right) << ss.
sstats[
IMIN_T][
a]
1100 <<
" " << std::setiosflags(std::ios::left) << std::setprecision(2) << std::setw(4)
1102 << std::setw(7) << std::setprecision(1) << ss.
sstats[
IMIN_S][
a] <<
" " 1104 << std::setprecision(3) << std::showpoint << std::fixed
1105 << std::setw(9) << std::setiosflags(std::ios::right) << ss.
sstats[
EMAX_T][
a]
1106 <<
" " << std::setiosflags(std::ios::left) << std::setprecision(2) << std::setw(4)
1108 << std::setw(7) << std::setprecision(1) << ss.
sstats[
EMAX_S][
a] <<
" " 1110 << std::setprecision(3) << std::showpoint << std::fixed
1111 << std::setw(9) << std::setiosflags(std::ios::right) << ss.
sstats[
EMEAN_T][
a]
1112 <<
" " << std::setiosflags(std::ios::left) << std::setprecision(2) << std::setw(4)
1114 << std::setw(7) << std::setprecision(1) << ss.
sstats[
EMEAN_S][
a] <<
" " 1116 << std::setprecision(3) << std::showpoint << std::fixed
1117 << std::setw(9) << std::setiosflags(std::ios::right) << ss.
sstats[
EMIN_T][
a]
1118 <<
" " << std::setiosflags(std::ios::left) << std::setprecision(2) << std::setw(4)
1120 << std::setw(7) << std::setprecision(1) << ss.
sstats[
EMIN_S][
a] << std::endl;
1122 Out <<
"#-------------------------------------------------" 1123 <<
"------------------------------------------------------------" 1124 <<
"-----------------------------------------------" << std::endl << std::endl
1131 #ifdef _PROFILER_WITH_PAPI_ 1134 PAPI_library_init(PAPI_VER_CURRENT);
1135 papiEventSet = PAPI_NULL;
1136 if(PAPI_create_eventset(&papiEventSet) != PAPI_OK){
1137 std::cerr <<
"PAPI Init Error: Could not create eventset."
double _inclusive
tree time
std::list< std::pair< unsigned int, StatMap > > PStatList
std::ostream * Out
stream for regular output
bool operator==(const struct timeval &t1, const struct timeval &t2)
faster timeval math
int FunctionExit(const std::string &name)
mark construct exit
bool _initd
whether the profiler has been initialized
int ReadEventsFromFile(const std::string &filename)
Read serial event file.
bool _finalized
whether the profiler has been finalized
bool operator!=(const struct timeval &t1, const struct timeval &t2)
faster timeval math
std::map< unsigned int, parallel_stats > PStatMap
std::ostream * Err
stream for errors
bool operator<(const timeval &t1, const timeval &t2)
faster timeval math
int ReadParallelEventFiles(const std::vector< std::string > &infiles, PEventList &par_event_list)
Read event files from parallel run.
double _timestamp
raw timestamp
void DumpContents(std::ostream &Ostr, const ContainerType &c, std::string del="\)
Dump container contents.
std::list< Event > event_list
completed events
int ScalabilitySummary(ScalaStatMap &scala_statmap, std::ostream &Out)
Scalability analysis output for multiple parallel runs.
unsigned int verblevel
verbosity level
int Finalize(bool writeFiles=true)
Shut down profiler.
Defines MPI-specific parallel global and program classes.
unsigned int call_minrank
FunctionMap function_map
map from construct name to unique id
struct timeval & operator-=(struct timeval &t1, const struct timeval &t2)
faster timeval math
unsigned int _id
unique identifyer
unsigned int call_maxrank
unsigned int excl_maxrank
Performance Profiling interface definition.
unsigned int profiler_rank
parallel processor id
void SummarizeSerialExecution(std::ostream &Ostr)
Profiling output for serial application.
ConfigMap configmap
map from unique id to construct name
std::vector< double > sstats[18]
std::ostream & operator<<(std::ostream &ost, const Event &e)
EVENT's stream operator.
static double Time()
Simple timer.
int Dump(std::ostream &Ostr)
dumps closed events, clears memory
double _exclusive
self time
int PopulateScalaMap(ScalaMap &scala_map, ScalaStatMap &scala_statmap, bool is_scaled)
Build scalability stats for multiple parallel runs.
void WriteEventFile()
Writes final even file.
int FunctionExitAll()
Force all open profiling Events to close (emergency)
std::map< unsigned int, scalability_stats > ScalaStatMap
Marks construct entry/exit.
bool operator>(const struct timeval &t1, const struct timeval &t2)
faster timeval math
std::list< Event > open_event_list
construct entry events
void size_t int size_t int size_t int int int int double int int double double *void size_t int size_t int int int int int double int size_t size_t size_t double double *void size_t int size_t int size_t size_t int double int double double *void size_t size_t size_t double * a
std::istream & operator>>(std::istream &ist, Event &e)
EVENT's stream operator.
void DumpEvents(std::ostream &Ostr)
?Not sure?
bool operator>=(const struct timeval &t1, const struct timeval &t2)
faster timeval math
unsigned int incl_maxrank
unsigned int nfunc
total number of constructs profiled
std::vector< unsigned int > nprocs
unsigned int excl_minrank
std::list< std::pair< unsigned int, std::list< Event > > > PEventList
int SummarizeParallelExecution(std::ostream &Ostr, std::ostream &Ouf, PEventList ¶llel_event_list)
Profiling output for single parallel run.
int FunctionEntry(const std::string &name)
mark construct entry
int ReadConfig(const std::string &fname)
Read configuration from file.
struct timeval & operator+=(struct timeval &t1, const struct timeval &t2)
faster timeval math
int Init(int id)
integer only inteface for init
std::map< unsigned int, PStatMap > ScalaMap
bool operator<=(const struct timeval &t1, const struct timeval &t2)
faster timeval math
std::map< unsigned int, cumulative_stats > StatMap
int ReadSummaryFiles(const std::vector< std::string > &input_files, ScalaMap &scala_map)
Read summary files from multiple parallel runs.
double time0
creation/init time
unsigned int incl_minrank