Teuchos - Trilinos Tools Package Version of the Day
Loading...
Searching...
No Matches
Teuchos_TimeMonitor.cpp
1// @HEADER
2// *****************************************************************************
3// Teuchos: Common Tools Package
4//
5// Copyright 2004 NTESS and the Teuchos contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
11#include "Teuchos_CommHelpers.hpp"
12#include "Teuchos_DefaultComm.hpp"
15#include "Teuchos_StandardParameterEntryValidators.hpp"
17#include "Teuchos_StackedTimer.hpp"
18
19#include <functional>
20#include <iomanip>
21#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
22#include <sstream>
23#endif
24
25namespace Teuchos {
78 template<class Ordinal, class ScalarType, class IndexType>
79 class MaxLoc :
80 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
81 public:
82 void
83 reduce (const Ordinal count,
84 const std::pair<ScalarType, IndexType> inBuffer[],
85 std::pair<ScalarType, IndexType> inoutBuffer[]) const;
86 };
87
88 template<class Ordinal>
89 class MaxLoc<Ordinal, double, int> :
90 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
91 public:
92 void
93 reduce (const Ordinal count,
94 const std::pair<double, int> inBuffer[],
95 std::pair<double, int> inoutBuffer[]) const
96 {
97 for (Ordinal ind = 0; ind < count; ++ind) {
98 const std::pair<double, int>& in = inBuffer[ind];
99 std::pair<double, int>& inout = inoutBuffer[ind];
100
101 if (in.first > inout.first) {
102 inout.first = in.first;
103 inout.second = in.second;
104 } else if (in.first < inout.first) {
105 // Don't need to do anything; inout has the values.
106 } else { // equal, or at least one is NaN.
107 inout.first = in.first;
108 inout.second = std::min (in.second, inout.second);
109 }
110 }
111 }
112 };
113
140 template<class Ordinal, class ScalarType, class IndexType>
141 class MinLoc :
142 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
143 public:
144 void
145 reduce (const Ordinal count,
146 const std::pair<ScalarType, IndexType> inBuffer[],
147 std::pair<ScalarType, IndexType> inoutBuffer[]) const;
148 };
149
150 template<class Ordinal>
151 class MinLoc<Ordinal, double, int> :
152 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
153 public:
154 void
155 reduce (const Ordinal count,
156 const std::pair<double, int> inBuffer[],
157 std::pair<double, int> inoutBuffer[]) const
158 {
159 for (Ordinal ind = 0; ind < count; ++ind) {
160 const std::pair<double, int>& in = inBuffer[ind];
161 std::pair<double, int>& inout = inoutBuffer[ind];
162
163 if (in.first < inout.first) {
164 inout.first = in.first;
165 inout.second = in.second;
166 } else if (in.first > inout.first) {
167 // Don't need to do anything; inout has the values.
168 } else { // equal, or at least one is NaN.
169 inout.first = in.first;
170 inout.second = std::min (in.second, inout.second);
171 }
172 }
173 }
174 };
175
179 template<class Ordinal, class ScalarType, class IndexType>
181 public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
182 public:
183 void
184 reduce (const Ordinal count,
185 const std::pair<ScalarType, IndexType> inBuffer[],
186 std::pair<ScalarType, IndexType> inoutBuffer[]) const;
187 };
188
189 template<class Ordinal>
190 class MinLocNonzero<Ordinal, double, int> :
191 public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
192 public:
193 void
194 reduce (const Ordinal count,
195 const std::pair<double, int> inBuffer[],
196 std::pair<double, int> inoutBuffer[]) const
197 {
198 for (Ordinal ind = 0; ind < count; ++ind) {
199 const std::pair<double, int>& in = inBuffer[ind];
200 std::pair<double, int>& inout = inoutBuffer[ind];
201
202 if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
203 inout.first = in.first;
204 inout.second = in.second;
205 } else if (in.first > inout.first) {
206 // Don't need to do anything; inout has the values.
207 } else { // equal, or at least one is NaN.
208 inout.first = in.first;
209 inout.second = std::min (in.second, inout.second);
210 }
211 }
212 }
213 };
214
215 // Typedef used internally by TimeMonitor::summarize() and its
216 // helper functions. The map is keyed on timer label (a string).
217 // Each value is a pair: (total number of seconds over all calls to
218 // that timer, total number of calls to that timer).
219 typedef std::map<std::string, std::pair<double, int> > timer_map_t;
220
221 // static initialization
222 Teuchos::RCP<Teuchos::StackedTimer> TimeMonitor::stackedTimer_ = Teuchos::rcp(new Teuchos::StackedTimer("Teuchos::StackedTimer"));
223
224 TimeMonitor::TimeMonitor (Time& timer, bool reset)
225 : PerformanceMonitorBase<Time>(timer, reset)
226 {
227 if (!isRecursiveCall()) {
228 counter().start(reset);
229#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
231 stackedTimer_->start(counter().name(),false);
232#endif
233 }
234 }
235
237 if (!isRecursiveCall()) {
238 counter().stop();
239#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
240 try {
242 stackedTimer_->stop(counter().name(),false);
243 }
244 catch (std::runtime_error& e) {
245 std::ostringstream warning;
246 warning <<
247 "\n*********************************************************************\n"
248 "WARNING: Overlapping timers detected! Near: " <<counter().name()<<"\n"
249 "A TimeMonitor timer was stopped before a nested subtimer was\n"
250 "stopped. This is not allowed by the StackedTimer. This corner case\n"
251 "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
252 "assigned to a new timer. To disable this warning, either fix the\n"
253 "ordering of timer creation and destuction or disable the StackedTimer\n"
254 "support in the TimeMonitor by setting the StackedTimer to null\n"
255 "Example:\n"
256 " RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"Junk\"))));\n"
257 "///code to time \n"
258 "MM = Teuchos::null;\n"
259 "MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"SecondJunk\"))));\n"
260 "*********************************************************************\n";
261 std::cout << warning.str() << std::endl << e.what() << std::endl;
263 }
264#endif
265 }
266 }
267
268 void
269 TimeMonitor::disableTimer (const std::string& name)
270 {
271 RCP<Time> timer = lookupCounter (name);
273 timer == null, std::invalid_argument,
274 "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
275 timer->disable ();
276 }
277
278 void
279 TimeMonitor::enableTimer (const std::string& name)
280 {
281 RCP<Time> timer = lookupCounter (name);
283 timer == null, std::invalid_argument,
284 "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
285 timer->enable ();
286 }
287
288 void
290 {
291 typedef std::map<std::string, RCP<Time> > map_type;
292 typedef map_type::iterator iter_type;
293 map_type& ctrs = counters ();
294
295 // In debug mode, loop first to check whether any of the timers
296 // are running, before resetting them. This ensures that this
297 // method satisfies the strong exception guarantee (either it
298 // completes normally, or there are no side effects).
299#ifdef TEUCHOS_DEBUG
300 for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
301 // We throw a runtime_error rather than a logic_error, because
302 // logic_error suggests a bug in the implementation of
303 // TimeMonitor. Calling zeroOutTimers() when a timer is running
304 // is not TimeMonitor's fault.
306 it->second->isRunning (), std::runtime_error,
307 "Timer \"" << it->second->name () << "\" is currently running. "
308 "You are not allowed to reset running timers.");
309 }
310#endif // TEUCHOS_DEBUG
311
312 for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
313 it->second->reset ();
314 }
315 }
316
317 // An anonymous namespace is the standard way of limiting linkage of
318 // its contained routines to file scope.
319 namespace {
320 // \brief Return an "empty" local timer datum.
321 //
322 // "Empty" means the datum has zero elapsed time and zero call
323 // count. This function does not actually create a timer.
324 //
325 // \param name The timer's name.
326 std::pair<std::string, std::pair<double, int> >
327 makeEmptyTimerDatum (const std::string& name)
328 {
329 return std::make_pair (name, std::make_pair (double(0), int(0)));
330 }
331
332 // \fn collectLocalTimerData
333 // \brief Collect and sort local timer data by timer names.
334 //
335 // \param localData [out] Map whose keys are the timer names, and
336 // whose value for each key is the total elapsed time (in
337 // seconds) and the call count for the timer with that name.
338 //
339 // \param localCounters [in] Timers from which to extract data.
340 //
341 // \param filter [in] Filter for timer labels. If filter is not
342 // empty, this method will only collect data for local timers
343 // whose labels begin with this string.
344 //
345 // Extract the total elapsed time and call count from each timer
346 // in the given array. Merge results for timers with duplicate
347 // labels, by summing their total elapsed times and call counts
348 // pairwise.
349 void
350 collectLocalTimerData (timer_map_t& localData,
351 const std::map<std::string, RCP<Time> >& localCounters,
352 const std::string& filter="")
353 {
354 using std::make_pair;
355 typedef timer_map_t::iterator iter_t;
356
357 timer_map_t theLocalData;
358 for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
359 it != localCounters.end(); ++it) {
360 const std::string& name = it->second->name ();
361
362 // Filter current timer name, if provided filter is nonempty.
363 // Filter string must _start_ the timer label, not just be in it.
364 const bool skipThisOne = (filter != "" && name.find (filter) != 0);
365 if (! skipThisOne) {
366 const double timing = it->second->totalElapsedTime ();
367 const int numCalls = it->second->numCalls ();
368
369 // Merge timers with duplicate labels, by summing their
370 // total elapsed times and call counts.
371 iter_t loc = theLocalData.find (name);
372 if (loc == theLocalData.end()) {
373 // Use loc as an insertion location hint.
374 theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
375 }
376 else {
377 loc->second.first += timing;
378 loc->second.second += numCalls;
379 }
380 }
381 }
382 // This avoids copying the map, and also makes this method
383 // satisfy the strong exception guarantee.
384 localData.swap (theLocalData);
385 }
386
387 // \brief Locally filter out timer data with zero call counts.
388 //
389 // \param timerData [in/out]
390 void
391 filterZeroData (timer_map_t& timerData)
392 {
393 // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
394 // iterator hint, instead of rebuilding the map completely.
395 timer_map_t newTimerData;
396 for (timer_map_t::const_iterator it = timerData.begin();
397 it != timerData.end(); ++it) {
398 if (it->second.second > 0) {
399 newTimerData[it->first] = it->second;
400 }
401 }
402 timerData.swap (newTimerData);
403 }
404
426 void
427 collectLocalTimerDataAndNames (timer_map_t& localTimerData,
428 Array<std::string>& localTimerNames,
429 const std::map<std::string, RCP<Time> >& localTimers,
430 const bool writeZeroTimers,
431 const std::string& filter="")
432 {
433 // Collect and sort local timer data by timer names.
434 collectLocalTimerData (localTimerData, localTimers, filter);
435
436 // Filter out zero data locally first. This ensures that if we
437 // are writing global stats, and if a timer name exists in the
438 // set of global names, then that timer has a nonzero call count
439 // on at least one MPI process.
440 if (! writeZeroTimers) {
441 filterZeroData (localTimerData);
442 }
443
444 // Extract the set of local timer names. The std::map keeps
445 // them sorted alphabetically.
446 localTimerNames.reserve (localTimerData.size());
447 for (timer_map_t::const_iterator it = localTimerData.begin();
448 it != localTimerData.end(); ++it) {
449 localTimerNames.push_back (it->first);
450 }
451 }
452
487 void
488 collectGlobalTimerData (timer_map_t& globalTimerData,
489 Array<std::string>& globalTimerNames,
490 timer_map_t& localTimerData,
491 Array<std::string>& localTimerNames,
492 Ptr<const Comm<int> > comm,
493 const bool alwaysWriteLocal,
494 const ECounterSetOp setOp)
495 {
496 // There may be some global timers that are not local timers on
497 // the calling MPI process(es). In that case, if
498 // alwaysWriteLocal is true, then we need to fill in the
499 // "missing" local timers. That will ensure that both global
500 // and local timer columns in the output table have the same
501 // number of rows. The collectLocalTimerDataAndNames() method
502 // may have already filtered out local timers with zero call
503 // counts (if its writeZeroTimers argument was false), but we
504 // won't be filtering again. Thus, any local timer data we
505 // insert here won't get filtered out.
506 //
507 // Note that calling summarize() with writeZeroTimers == false
508 // will still do what it says, even if we insert local timers
509 // with zero call counts here.
510
511 // This does the correct and inexpensive thing (just copies the
512 // timer data) if numProcs == 1. Otherwise, it initiates a
513 // communication with \f$O(\log P)\f$ messages along the
514 // critical path, where \f$P\f$ is the number of participating
515 // processes.
516 mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
517
518#ifdef TEUCHOS_DEBUG
519 {
520 // Sanity check that all processes have the name number of
521 // global timer names.
522 const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
523 timer_map_t::size_type minNumGlobalNames = 0;
524 timer_map_t::size_type maxNumGlobalNames = 0;
525 reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
526 outArg (minNumGlobalNames));
527 reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
528 outArg (maxNumGlobalNames));
529 TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
530 std::logic_error, "Min # global timer names = " << minNumGlobalNames
531 << " != max # global timer names = " << maxNumGlobalNames
532 << ". Please report this bug to the Teuchos developers.");
533 TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
534 std::logic_error, "My # global timer names = " << myNumGlobalNames
535 << " != min # global timer names = " << minNumGlobalNames
536 << ". Please report this bug to the Teuchos developers.");
537 }
538#endif // TEUCHOS_DEBUG
539
540 // mergeCounterNames() just merges the counters' names, not
541 // their actual data. Now we need to fill globalTimerData with
542 // this process' timer data for the timers in globalTimerNames.
543 //
544 // All processes need the full list of global timers, since
545 // there may be some global timers that are not local timers.
546 // That's why mergeCounterNames() has to be an all-reduce, not
547 // just a reduction to Proc 0.
548 //
549 // Insertion optimization: if the iterator given to map::insert
550 // points right before where we want to insert, insertion is
551 // O(1). globalTimerNames is sorted, so feeding the iterator
552 // output of map::insert into the next invocation's input should
553 // make the whole insertion O(N) where N is the number of
554 // entries in globalTimerNames.
555 timer_map_t::iterator globalMapIter = globalTimerData.begin();
556 timer_map_t::iterator localMapIter;
557 for (Array<string>::const_iterator it = globalTimerNames.begin();
558 it != globalTimerNames.end(); ++it) {
559 const std::string& globalName = *it;
560 localMapIter = localTimerData.find (globalName);
561
562 if (localMapIter == localTimerData.end()) {
563 if (alwaysWriteLocal) {
564 // If there are some global timers that are not local
565 // timers, and if we want to print local timers, we insert
566 // a local timer datum with zero elapsed time and zero
567 // call count into localTimerData as well. This will
568 // ensure that both global and local timer columns in the
569 // output table have the same number of rows.
570 //
571 // We really only need to do this on Proc 0, which is the
572 // only process that currently may print local timers.
573 // However, we do it on all processes, just in case
574 // someone later wants to modify this function to print
575 // out local timer data for some process other than Proc
576 // 0. This extra computation won't affect the cost along
577 // the critical path, for future computations in which
578 // Proc 0 participates.
579 localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
580
581 // Make sure the missing global name gets added to the
582 // list of local names. We'll re-sort the list of local
583 // names below.
584 localTimerNames.push_back (globalName);
585 }
586 // There's a global timer that's not a local timer. Add it
587 // to our pre-merge version of the global timer data so that
588 // we can safely merge the global timer data later.
589 globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
590 }
591 else {
592 // We have this global timer name in our local timer list.
593 // Fill in our pre-merge version of the global timer data
594 // with our local data.
595 globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
596 }
597 }
598
599 if (alwaysWriteLocal) {
600 // Re-sort the list of local timer names, since we may have
601 // inserted "missing" names above.
602 std::sort (localTimerNames.begin(), localTimerNames.end());
603 }
604
605#ifdef TEUCHOS_DEBUG
606 {
607 // Sanity check that all processes have the name number of
608 // global timers.
609 const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
610 timer_map_t::size_type minNumGlobalTimers = 0;
611 timer_map_t::size_type maxNumGlobalTimers = 0;
612 reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
613 outArg (minNumGlobalTimers));
614 reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
615 outArg (maxNumGlobalTimers));
616 TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
617 std::logic_error, "Min # global timers = " << minNumGlobalTimers
618 << " != max # global timers = " << maxNumGlobalTimers
619 << ". Please report this bug to the Teuchos developers.");
620 TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
621 std::logic_error, "My # global timers = " << myNumGlobalTimers
622 << " != min # global timers = " << minNumGlobalTimers
623 << ". Please report this bug to the Teuchos developers.");
624 }
625#endif // TEUCHOS_DEBUG
626 }
627
674 void
675 computeGlobalTimerStats (stat_map_type& statData,
676 std::vector<std::string>& statNames,
677 Ptr<const Comm<int> > comm,
678 const timer_map_t& globalTimerData,
679 const bool ignoreZeroTimers)
680 {
681 using Teuchos::ScalarTraits;
682
683 const int numTimers = static_cast<int> (globalTimerData.size());
684 const int numProcs = comm->getSize();
685
686 // Extract pre-reduction timings and call counts into a
687 // sequential array. This array will be in the same order as
688 // the global timer names are in the map.
689 Array<std::pair<double, int> > timingsAndCallCounts;
690 timingsAndCallCounts.reserve (numTimers);
691 for (timer_map_t::const_iterator it = globalTimerData.begin();
692 it != globalTimerData.end(); ++it) {
693 timingsAndCallCounts.push_back (it->second);
694 }
695
696 // For each timer name, compute the min timing and its
697 // corresponding call count. If two processes have the same
698 // timing but different call counts, the minimum call count will
699 // be used.
700 Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
701 if (numTimers > 0) {
702 if (ignoreZeroTimers)
703 reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
704 &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
705 else
706 reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
707 &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
708 }
709
710 // For each timer name, compute the max timing and its
711 // corresponding call count. If two processes have the same
712 // timing but different call counts, the minimum call count will
713 // be used.
714 Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
715 if (numTimers > 0) {
716 reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
717 &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
718 }
719
720 // For each timer name, compute the mean-over-processes timing,
721 // the mean call count, and the mean-over-call-counts timing.
722 // The mean call count is reported as a double to allow a
723 // fractional value.
724 //
725 // Each local timing is really the total timing over all local
726 // invocations. The number of local invocations is the call
727 // count. Thus, the mean-over-call-counts timing is the sum of
728 // all the timings (over all processes), divided by the sum of
729 // all the call counts (over all processes). We compute it in a
730 // different way to over unnecessary overflow.
731 Array<double> meanOverCallCountsTimings (numTimers);
732 Array<double> meanOverProcsTimings (numTimers);
733 Array<double> meanCallCounts (numTimers);
734 Array<int> ICallThisTimer (numTimers);
735 Array<int> numProcsCallingEachTimer (numTimers);
736 {
737 // Figure out how many processors actually call each timer.
738 if (ignoreZeroTimers) {
739 for (int k = 0; k < numTimers; ++k) {
740 const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
741 if (callCount > 0) ICallThisTimer[k] = 1;
742 else ICallThisTimer[k] = 0;
743 }
744 if (numTimers > 0) {
745 reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
746 &numProcsCallingEachTimer[0]);
747 }
748 }
749
750 // When summing, first scale by the number of processes. This
751 // avoids unnecessary overflow, and also gives us the mean
752 // call count automatically.
753 Array<double> scaledTimings (numTimers);
754 Array<double> scaledCallCounts (numTimers);
755 const double P = static_cast<double> (numProcs);
756
757 if (ignoreZeroTimers) {
758 for (int k = 0; k < numTimers; ++k) {
759 const double timing = timingsAndCallCounts[k].first;
760 const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
761
762 scaledTimings[k] = timing / numProcsCallingEachTimer[k];
763 scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
764 }
765 }
766 else {
767 for (int k = 0; k < numTimers; ++k) {
768 const double timing = timingsAndCallCounts[k].first;
769 const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
770
771 scaledTimings[k] = timing / P;
772 scaledCallCounts[k] = callCount / P;
773 }
774 }
775
776 if (numTimers > 0) {
777 reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
778 &meanOverProcsTimings[0]);
779 reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
780 &meanCallCounts[0]);
781 }
782 // We don't have to undo the scaling for the mean timings;
783 // just divide by the scaled call count.
784 for (int k = 0; k < numTimers; ++k) {
785 if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
786 meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
787 }
788 else {
789 meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
790 }
791 }
792 }
793
794 // Reformat the data into the map of statistics. Be sure that
795 // each value (the std::vector of (timing, call count) pairs,
796 // each entry of which is a different statistic) preserves the
797 // order of statNames.
798 statNames.resize (4);
799 statNames[0] = "MinOverProcs";
800 statNames[1] = "MeanOverProcs";
801 statNames[2] = "MaxOverProcs";
802 statNames[3] = "MeanOverCallCounts";
803
804 stat_map_type::iterator statIter = statData.end();
805 timer_map_t::const_iterator it = globalTimerData.begin();
806 for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
807 std::vector<std::pair<double, double> > curData (4);
808 curData[0] = minTimingsAndCallCounts[k];
809 curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
810 curData[2] = maxTimingsAndCallCounts[k];
811 curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
812
813 // statIter gives an insertion location hint that makes each
814 // insertion O(1), since we remember the location of the last
815 // insertion.
816 statIter = statData.insert (statIter, std::make_pair (it->first, curData));
817 }
818 }
819
820
838 getDefaultComm ()
839 {
840 // The default communicator. If Trilinos was built with MPI
841 // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet
842 // been initialized, it's not valid to use the communicator!)
843 // Otherwise, this should be a "serial" (no MPI, one "process")
844 // communicator.
846
847#ifdef HAVE_MPI
848 {
849 int mpiHasBeenStarted = 0;
850 MPI_Initialized (&mpiHasBeenStarted);
851 if (! mpiHasBeenStarted) {
852 // Make pComm a new "serial communicator."
854 }
855 }
856#endif // HAVE_MPI
857 return comm;
858 }
859
860 } // namespace (anonymous)
861
862
863 void
865 std::vector<std::string>& statNames,
866 Ptr<const Comm<int> > comm,
867 const ECounterSetOp setOp,
868 const std::string& filter)
869 {
870 // Collect local timer data and names. Filter out timers with
871 // zero call counts if writeZeroTimers is false. Also, apply the
872 // timer label filter at this point, so we don't have to compute
873 // statistics on timers we don't want to display anyway.
874 timer_map_t localTimerData;
875 Array<std::string> localTimerNames;
876 const bool writeZeroTimers = false;
877 collectLocalTimerDataAndNames (localTimerData, localTimerNames,
878 counters(), writeZeroTimers, filter);
879 // Merge the local timer data and names into global timer data and
880 // names.
881 timer_map_t globalTimerData;
882 Array<std::string> globalTimerNames;
883 const bool alwaysWriteLocal = false;
884 collectGlobalTimerData (globalTimerData, globalTimerNames,
885 localTimerData, localTimerNames,
886 comm, alwaysWriteLocal, setOp);
887 // Compute statistics on the data.
888 computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false);
889 }
890
891
892 void
894 std::ostream& out,
895 const bool alwaysWriteLocal,
896 const bool writeGlobalStats,
897 const bool writeZeroTimers,
898 const ECounterSetOp setOp,
899 const std::string& filter,
900 const bool ignoreZeroTimers)
901 {
902 //
903 // We can't just call computeGlobalTimerStatistics(), since
904 // summarize() has different options that affect whether global
905 // statistics are computed and printed.
906 //
907 const int numProcs = comm->getSize();
908 const int myRank = comm->getRank();
909
910 // Collect local timer data and names. Filter out timers with
911 // zero call counts if writeZeroTimers is false. Also, apply the
912 // timer label filter at this point, so we don't have to compute
913 // statistics on timers we don't want to display anyway.
914 timer_map_t localTimerData;
915 Array<std::string> localTimerNames;
916 collectLocalTimerDataAndNames (localTimerData, localTimerNames,
917 counters(), writeZeroTimers, filter);
918
919 // If we're computing global statistics, merge the local timer
920 // data and names into global timer data and names, and compute
921 // global timer statistics. Otherwise, leave the global data
922 // empty.
923 timer_map_t globalTimerData;
924 Array<std::string> globalTimerNames;
925 stat_map_type statData;
926 std::vector<std::string> statNames;
927 if (writeGlobalStats) {
928 collectGlobalTimerData (globalTimerData, globalTimerNames,
929 localTimerData, localTimerNames,
930 comm, alwaysWriteLocal, setOp);
931 // Compute statistics on the data, but only if the communicator
932 // contains more than one process. Otherwise, statistics don't
933 // make sense and we don't print them (see below).
934 if (numProcs > 1) {
935 computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers);
936 }
937 }
938
939 // Precision of floating-point numbers in the table.
940 const int precision = format().precision();
941 const std::ios_base::fmtflags& flags = out.flags();
942
943 // All columns of the table, in order.
944 Array<TableColumn> tableColumns;
945
946 // Labels of all the columns of the table.
947 // We will append to this when we add each column.
948 Array<std::string> titles;
949
950 // Widths (in number of characters) of each column.
951 // We will append to this when we add each column.
952 Array<int> columnWidths;
953
954 // Table column containing all timer names. If writeGlobalStats
955 // is true, we use the global timer names, otherwise we use the
956 // local timer names. We build the table on all processes
957 // redundantly, but only print on Rank 0.
958 {
959 titles.append ("Timer Name");
960
961 // The column labels depend on whether we are computing global statistics.
962 TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
963 tableColumns.append (nameCol);
964
965 // Each column is as wide as it needs to be to hold both its
966 // title and all of the column data. This column's title is the
967 // current last entry of the titles array.
968 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
969 }
970
971 // Table column containing local timer stats, if applicable. We
972 // only write local stats if asked, only on MPI Proc 0, and only
973 // if there is more than one MPI process in the communicator
974 // (otherwise local stats == global stats, so we just print the
975 // global stats). In this case, we've padded the local data on
976 // Proc 0 if necessary to match the global timer list, so that the
977 // columns have the same number of rows.
978 if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
979 titles.append ("Local time (num calls)");
980
981 // Copy local timer data out of the array-of-structs into
982 // separate arrays, for display in the table.
983 Array<double> localTimings;
984 Array<double> localNumCalls;
985 for (timer_map_t::const_iterator it = localTimerData.begin();
986 it != localTimerData.end(); ++it) {
987 localTimings.push_back (it->second.first);
988 localNumCalls.push_back (static_cast<double> (it->second.second));
989 }
990 TableColumn timeAndCalls (localTimings, localNumCalls, precision, flags, true);
991 tableColumns.append (timeAndCalls);
992 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
993 }
994
995 if (writeGlobalStats) {
996 // If there's only 1 process in the communicator, don't display
997 // statistics; statistics don't make sense in that case. Just
998 // display the timings and call counts. If there's more than 1
999 // process, do display statistics.
1000 if (numProcs == 1) {
1001 // Extract timings and the call counts from globalTimerData.
1002 Array<double> globalTimings;
1003 Array<double> globalNumCalls;
1004 for (timer_map_t::const_iterator it = globalTimerData.begin();
1005 it != globalTimerData.end(); ++it) {
1006 globalTimings.push_back (it->second.first);
1007 globalNumCalls.push_back (static_cast<double> (it->second.second));
1008 }
1009 // Print the table column.
1010 titles.append ("Global time (num calls)");
1011 TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, flags, true);
1012 tableColumns.append (timeAndCalls);
1013 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1014 }
1015 else { // numProcs > 1
1016 // Print a table column for each statistic. statNames and
1017 // each value in statData use the same ordering, so we can
1018 // iterate over valid indices of statNames to display the
1019 // statistics in the right order.
1020 const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
1021 for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
1022 // Extract lists of timings and their call counts for the
1023 // current statistic.
1024 Array<double> statTimings (numGlobalTimers);
1025 Array<double> statCallCounts (numGlobalTimers);
1026 stat_map_type::const_iterator it = statData.begin();
1027 for (int k = 0; it != statData.end(); ++it, ++k) {
1028 statTimings[k] = (it->second[statInd]).first;
1029 statCallCounts[k] = (it->second[statInd]).second;
1030 }
1031 // Print the table column.
1032 const std::string& statisticName = statNames[statInd];
1033 const std::string titleString = statisticName;
1034 titles.append (titleString);
1035 TableColumn timeAndCalls (statTimings, statCallCounts, precision, flags, true);
1036 tableColumns.append (timeAndCalls);
1037 columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1038 }
1039 }
1040 }
1041
1042 // Print the whole table to the given output stream on MPI Rank 0.
1043 format().setColumnWidths (columnWidths);
1044 if (myRank == 0) {
1045 std::ostringstream theTitle;
1046 theTitle << "TimeMonitor results over " << numProcs << " processor"
1047 << (numProcs > 1 ? "s" : "");
1048 format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
1049 }
1050 }
1051
1052 void
1053 TimeMonitor::summarize (std::ostream &out,
1054 const bool alwaysWriteLocal,
1055 const bool writeGlobalStats,
1056 const bool writeZeroTimers,
1057 const ECounterSetOp setOp,
1058 const std::string& filter,
1059 const bool ignoreZeroTimers)
1060 {
1061 // The default communicator. If Trilinos was built with MPI
1062 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1063 // be a "serial" (no MPI, one "process") communicator.
1064 RCP<const Comm<int> > comm = getDefaultComm();
1065
1066 summarize (comm.ptr(), out, alwaysWriteLocal,
1067 writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers);
1068 }
1069
1070 void
1072 std::vector<std::string>& statNames,
1073 const ECounterSetOp setOp,
1074 const std::string& filter)
1075 {
1076 // The default communicator. If Trilinos was built with MPI
1077 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1078 // be a "serial" (no MPI, one "process") communicator.
1079 RCP<const Comm<int> > comm = getDefaultComm();
1080
1081 computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
1082 }
1083
1084 SyncTimeMonitor::SyncTimeMonitor(Time& timer, Ptr<const Comm<int> > comm, bool reset)
1085 : TimeMonitor(timer, reset), comm_(comm)
1086 { }
1087
1089 comm_->barrier();
1090 }
1091
1092
1093 namespace {
1117 std::string
1118 quoteLabelForYaml (const std::string& label)
1119 {
1120 // YAML allows empty keys in key: value pairs. See Section 7.2
1121 // of the YAML 1.2 spec. We thus let an empty label pass
1122 // through without quoting or other special treatment.
1123 if (label.empty ()) {
1124 return label;
1125 }
1126
1127 // Check whether the label is already quoted. If so, we don't
1128 // need to quote it again. However, we do need to quote any
1129 // quote symbols in the string inside the outer quotes.
1130 const bool alreadyQuoted = label.size () >= 2 &&
1131 label[0] == '"' && label[label.size() - 1] == '"';
1132
1133 // We need to quote if there are any colons or (inner) quotes in
1134 // the string. We'll determine this as we read through the
1135 // string and escape any characters that need escaping.
1136 bool needToQuote = false;
1137
1138 std::string out; // To fill with the return value
1139 out.reserve (label.size ());
1140
1141 const size_t startPos = alreadyQuoted ? 1 : 0;
1142 const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
1143 for (size_t i = startPos; i < endPos; ++i) {
1144 const char c = label[i];
1145 if (c == '"' || c == '\\') {
1146 out.push_back ('\\'); // Escape the quote or backslash.
1147 needToQuote = true;
1148 }
1149 else if (c == ':') {
1150 needToQuote = true;
1151 }
1152 out.push_back (c);
1153 }
1154
1155 if (needToQuote || alreadyQuoted) {
1156 // If the input string was already quoted, then out doesn't
1157 // include its quotes, so we have to add them back in.
1158 return "\"" + out + "\"";
1159 }
1160 else {
1161 return out;
1162 }
1163 }
1164
1165 } // namespace (anonymous)
1166
1167
1168 void TimeMonitor::
1169 summarizeToYaml (Ptr<const Comm<int> > comm,
1170 std::ostream &out,
1171 const ETimeMonitorYamlFormat yamlStyle,
1172 const std::string& filter)
1173 {
1177 using Teuchos::OSTab;
1178 using Teuchos::RCP;
1179 using Teuchos::rcpFromRef;
1180 using std::endl;
1181 typedef std::vector<std::string>::size_type size_type;
1182
1183 const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
1184
1185 // const bool writeGlobalStats = true;
1186 // const bool writeZeroTimers = true;
1187 // const bool alwaysWriteLocal = false;
1188 const ECounterSetOp setOp = Intersection;
1189
1190 stat_map_type statData;
1191 std::vector<std::string> statNames;
1192 computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
1193
1194 const int numProcs = comm->getSize();
1195
1196 // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
1197 // " as the line prefix does not work, else I would prefer that
1198 // method for printing each line of a YAML block sequence (see
1199 // Section 8.2.1 of the YAML 1.2 spec).
1200 //
1201 // Also, I have to set the tab indent string here, rather than in
1202 // OSTab's constructor. This is because line prefix (which for
1203 // some reason is what OSTab's constructor takes, rather than tab
1204 // indent string) means something different from tab indent
1205 // string, and turning on the line prefix prints all sorts of
1206 // things including "|" for some reason.
1207 RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
1208 pfout->setTabIndentStr (" ");
1209 FancyOStream& fout = *pfout;
1210
1211 fout << "# Teuchos::TimeMonitor report" << endl
1212 << "---" << endl;
1213
1214 // mfh 19 Aug 2012: An important goal of our chosen output format
1215 // was to minimize the nesting depth. We have managed to keep the
1216 // nesting depth to 3, which is the limit that the current version
1217 // of PylotDB imposes for its YAML input.
1218
1219 // Outermost level is a dictionary. (Individual entries of a
1220 // dictionary do _not_ begin with "- ".) We always print the
1221 // outermost level in standard style, not flow style, for better
1222 // readability. We begin the outermost level with metadata.
1223 fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
1224 << "Number of processes: " << numProcs << endl
1225 << "Time unit: s" << endl;
1226 // For a key: value pair where the value is a sequence or
1227 // dictionary on the following line, YAML requires a space after
1228 // the colon.
1229 fout << "Statistics collected: ";
1230 // Print list of the names of all the statistics we collected.
1231 if (compact) {
1232 fout << " [";
1233 for (size_type i = 0; i < statNames.size (); ++i) {
1234 fout << quoteLabelForYaml (statNames[i]);
1235 if (i + 1 < statNames.size ()) {
1236 fout << ", ";
1237 }
1238 }
1239 fout << "]" << endl;
1240 }
1241 else {
1242 fout << endl;
1243 OSTab tab1 (pfout);
1244 for (size_type i = 0; i < statNames.size (); ++i) {
1245 fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
1246 }
1247 }
1248
1249 // Print the list of timer names.
1250 //
1251 // It might be nicer instead to print a map from timer name to all
1252 // of its data, but keeping the maximum nesting depth small
1253 // ensures better compatibility with different parsing tools.
1254 fout << "Timer names: ";
1255 if (compact) {
1256 fout << " [";
1257 size_type ind = 0;
1258 for (stat_map_type::const_iterator it = statData.begin();
1259 it != statData.end(); ++it, ++ind) {
1260 fout << quoteLabelForYaml (it->first);
1261 if (ind + 1 < statData.size ()) {
1262 fout << ", ";
1263 }
1264 }
1265 fout << "]" << endl;
1266 }
1267 else {
1268 fout << endl;
1269 OSTab tab1 (pfout);
1270 for (stat_map_type::const_iterator it = statData.begin();
1271 it != statData.end(); ++it) {
1272 fout << "- " << quoteLabelForYaml (it->first) << endl;
1273 }
1274 }
1275
1276 // Print times for each timer, as a map from statistic name to its time.
1277 fout << "Total times: ";
1278 if (compact) {
1279 fout << " {";
1280 size_type outerInd = 0;
1281 for (stat_map_type::const_iterator outerIter = statData.begin();
1282 outerIter != statData.end(); ++outerIter, ++outerInd) {
1283 // Print timer name.
1284 fout << quoteLabelForYaml (outerIter->first) << ": ";
1285 // Print that timer's data.
1286 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1287 fout << "{";
1288 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1289 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1290 << curData[innerInd].first;
1291 if (innerInd + 1 < curData.size ()) {
1292 fout << ", ";
1293 }
1294 }
1295 fout << "}";
1296 if (outerInd + 1 < statData.size ()) {
1297 fout << ", ";
1298 }
1299 }
1300 fout << "}" << endl;
1301 }
1302 else {
1303 fout << endl;
1304 OSTab tab1 (pfout);
1305 size_type outerInd = 0;
1306 for (stat_map_type::const_iterator outerIter = statData.begin();
1307 outerIter != statData.end(); ++outerIter, ++outerInd) {
1308 // Print timer name.
1309 fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1310 // Print that timer's data.
1311 OSTab tab2 (pfout);
1312 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1313 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1314 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1315 << curData[innerInd].first << endl;
1316 }
1317 }
1318 }
1319
1320 // Print call counts for each timer, for each statistic name.
1321 fout << "Call counts:";
1322 if (compact) {
1323 fout << " {";
1324 size_type outerInd = 0;
1325 for (stat_map_type::const_iterator outerIter = statData.begin();
1326 outerIter != statData.end(); ++outerIter, ++outerInd) {
1327 // Print timer name.
1328 fout << quoteLabelForYaml (outerIter->first) << ": ";
1329 // Print that timer's data.
1330 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1331 fout << "{";
1332 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1333 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1334 << curData[innerInd].second;
1335 if (innerInd + 1 < curData.size ()) {
1336 fout << ", ";
1337 }
1338 }
1339 fout << "}";
1340 if (outerInd + 1 < statData.size ()) {
1341 fout << ", ";
1342 }
1343 }
1344 fout << "}" << endl;
1345 }
1346 else {
1347 fout << endl;
1348 OSTab tab1 (pfout);
1349 size_type outerInd = 0;
1350 for (stat_map_type::const_iterator outerIter = statData.begin();
1351 outerIter != statData.end(); ++outerIter, ++outerInd) {
1352 // Print timer name.
1353 fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1354 // Print that timer's data.
1355 OSTab tab2 (pfout);
1356 const std::vector<std::pair<double, double> >& curData = outerIter->second;
1357 for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1358 fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1359 << curData[innerInd].second << endl;
1360 }
1361 }
1362 }
1363 }
1364
1365 void TimeMonitor::
1366 summarizeToYaml (std::ostream &out,
1367 const ETimeMonitorYamlFormat yamlStyle,
1368 const std::string& filter)
1369 {
1370 // The default communicator. If Trilinos was built with MPI
1371 // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1372 // be a "serial" (no MPI, one "process") communicator.
1373 RCP<const Comm<int> > comm = getDefaultComm ();
1374
1375 summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
1376 }
1377
1378 // Default value is false. We'll set to true once
1379 // setReportParameters() completes successfully.
1380 bool TimeMonitor::setParams_ = false;
1381
1382 // We have to declare all of these here in order to avoid linker errors.
1383 TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
1384 TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
1385 ECounterSetOp TimeMonitor::setOp_ = Intersection;
1386 bool TimeMonitor::alwaysWriteLocal_ = false;
1387 bool TimeMonitor::writeGlobalStats_ = true;
1388 bool TimeMonitor::writeZeroTimers_ = true;
1389
1390 void
1391 TimeMonitor::setReportFormatParameter (ParameterList& plist)
1392 {
1393 const std::string name ("Report format");
1394 const std::string defaultValue ("Table");
1395 const std::string docString ("Output format for report of timer statistics");
1396 Array<std::string> strings;
1397 Array<std::string> docs;
1398 Array<ETimeMonitorReportFormat> values;
1399
1400 strings.push_back ("YAML");
1401 docs.push_back ("YAML (see yaml.org) format");
1402 values.push_back (REPORT_FORMAT_YAML);
1403 strings.push_back ("Table");
1404 docs.push_back ("Tabular format via Teuchos::TableFormat");
1405 values.push_back (REPORT_FORMAT_TABLE);
1406
1408 docString,
1409 strings (), docs (),
1410 values (), &plist);
1411 }
1412
1413 void
1414 TimeMonitor::setYamlFormatParameter (ParameterList& plist)
1415 {
1416 const std::string name ("YAML style");
1417 const std::string defaultValue ("spacious");
1418 const std::string docString ("YAML-specific output format");
1419 Array<std::string> strings;
1420 Array<std::string> docs;
1421 Array<ETimeMonitorYamlFormat> values;
1422
1423 strings.push_back ("compact");
1424 docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
1425 "yaml.org) for most sequences except the outermost sequence");
1426 values.push_back (YAML_FORMAT_COMPACT);
1427
1428 strings.push_back ("spacious");
1429 docs.push_back ("Spacious format: avoid flow style");
1430 values.push_back (YAML_FORMAT_SPACIOUS);
1431
1433 docString,
1434 strings (), docs (),
1435 values (), &plist);
1436 }
1437
1438 void
1439 TimeMonitor::setSetOpParameter (ParameterList& plist)
1440 {
1441 const std::string name ("How to merge timer sets");
1442 const std::string defaultValue ("Intersection");
1443 const std::string docString ("How to merge differing sets of timers "
1444 "across processes");
1445 Array<std::string> strings;
1446 Array<std::string> docs;
1447 Array<ECounterSetOp> values;
1448
1449 strings.push_back ("Intersection");
1450 docs.push_back ("Compute intersection of timer sets over processes");
1451 values.push_back (Intersection);
1452 strings.push_back ("Union");
1453 docs.push_back ("Compute union of timer sets over processes");
1454 values.push_back (Union);
1455
1456 setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
1457 strings (), docs (), values (),
1458 &plist);
1459 }
1460
1461 void
1466
1469 {
1470 return stackedTimer_;
1471 }
1472
1475 {
1476 // Our implementation favors recomputation over persistent
1477 // storage. That is, we simply recreate the list every time we
1478 // need it.
1479 RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
1480
1481 const bool alwaysWriteLocal = false;
1482 const bool writeGlobalStats = true;
1483 const bool writeZeroTimers = true;
1484
1485 setReportFormatParameter (*plist);
1486 setYamlFormatParameter (*plist);
1487 setSetOpParameter (*plist);
1488 plist->set ("alwaysWriteLocal", alwaysWriteLocal,
1489 "Always output local timers' values on Proc 0");
1490 plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
1491 "statistics, even if there is only one process in the "
1492 "communicator");
1493 plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
1494 "timers that have never been called");
1495
1497 }
1498
1499 void
1500 TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
1501 {
1502 ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
1503 ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
1504 ECounterSetOp setOp = Intersection;
1505 bool alwaysWriteLocal = false;
1506 bool writeGlobalStats = true;
1507 bool writeZeroTimers = true;
1508
1509 if (params.is_null ()) {
1510 // If we've set parameters before, leave their current values.
1511 // Otherwise, set defaults (below).
1512 if (setParams_) {
1513 return;
1514 }
1515 }
1516 else { // params is nonnull. Let's read it!
1517 params->validateParametersAndSetDefaults (*getValidReportParameters ());
1518
1519 reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
1520 yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
1521 setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
1522 alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
1523 writeGlobalStats = params->get<bool> ("writeGlobalStats");
1524 writeZeroTimers = params->get<bool> ("writeZeroTimers");
1525 }
1526 // Defer setting state until here, to ensure the strong exception
1527 // guarantee for this method (either it throws with no externally
1528 // visible state changes, or it returns normally).
1529 reportFormat_ = reportFormat;
1530 yamlStyle_ = yamlStyle;
1531 setOp_ = setOp;
1532 alwaysWriteLocal_ = alwaysWriteLocal;
1533 writeGlobalStats_ = writeGlobalStats;
1534 writeZeroTimers_ = writeZeroTimers;
1535
1536 setParams_ = true; // Yay, we successfully set parameters!
1537 }
1538
1539 void
1541 std::ostream& out,
1542 const std::string& filter,
1543 const RCP<ParameterList>& params)
1544 {
1545 setReportParameters (params);
1546
1547 if (reportFormat_ == REPORT_FORMAT_YAML) {
1548 summarizeToYaml (comm, out, yamlStyle_, filter);
1549 }
1550 else if (reportFormat_ == REPORT_FORMAT_TABLE) {
1551 summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
1552 writeZeroTimers_, setOp_, filter);
1553 }
1554 else {
1555 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
1556 "Invalid report format. This should never happen; ParameterList "
1557 "validation should have caught this. Please report this bug to the "
1558 "Teuchos developers.");
1559 }
1560 }
1561
1562 void
1564 std::ostream& out,
1565 const RCP<ParameterList>& params)
1566 {
1567 report (comm, out, "", params);
1568 }
1569
1570 void
1571 TimeMonitor::report (std::ostream& out,
1572 const std::string& filter,
1573 const RCP<ParameterList>& params)
1574 {
1575 RCP<const Comm<int> > comm = getDefaultComm ();
1576 report (comm.ptr (), out, filter, params);
1577 }
1578
1579 void
1580 TimeMonitor::report (std::ostream& out,
1581 const RCP<ParameterList>& params)
1582 {
1583 RCP<const Comm<int> > comm = getDefaultComm ();
1584 report (comm.ptr (), out, "", params);
1585 }
1586
1587} // namespace Teuchos
Defines basic traits for the scalar field type.
A column of TableEntry objects.
Provides utilities for formatting tabular output.
Scope guard for Teuchos::Time, with MPI collective timer reporting.
Replacement for std::vector that is compatible with the Teuchos Memory Management classes.
void reserve(size_type n)
void push_back(const value_type &x)
std::vector< T >::const_iterator const_iterator
The type of a const forward iterator.
Array< T > & append(const T &x)
Add a new entry at the end of the array.
Abstract interface for distributed-memory communication.
void reduceAll(const Comm< Ordinal > &comm, const ValueTypeReductionOp< Ordinal, Packet > &reductOp, const Ordinal count, const Packet sendBuffer[], Packet globalReducts[])
Wrapper for MPI_Allreduce that takes a custom reduction operator.
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
Teuchos version of MPI_MAXLOC.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
same as MinLoc, but don't allow zero
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
Teuchos version of MPI_MINLOC.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
A list of parameters of arbitrary type.
IntegralType getIntegralValue(ParameterList const &paramList, std::string const &paramName)
Get an integral value for a parameter that is assumed to already be set.
RCP< ParameterList > parameterList()
Nonmember constructor.
void setStringToIntegralParameter(std::string const &paramName, std::string const &defaultValue, std::string const &docString, ArrayView< const std::string > const &strings, ParameterList *paramList)
Set up a std::string parameter that will use an embedded validator to allow the extraction of an inte...
PerformanceMonitorBase(Time &counter_in, bool reset=false)
static std::map< std::string, RCP< Time > > & counters()
static RCP< Time > lookupCounter(const std::string &name)
Simple wrapper class for raw pointers to single objects where no persisting relationship exists.
Ptr< T > outArg(T &arg)
create a non-persisting (required or optional) output argument for a function call.
Smart reference counting pointer class for automatic garbage collection.
RCP< T2 > rcp_implicit_cast(const RCP< T1 > &p1)
Implicit cast of underlying RCP type from T1* to T2*.
RCP< T2 > rcp_const_cast(const RCP< T1 > &p1)
Constant cast of underlying RCP type from T1* to T2*.
RCP< T > rcpFromRef(T &r)
Return a non-owning weak RCP object from a raw object reference for a defined type.
bool is_null() const
Returns true if the underlying pointer is null.
Ptr< T > ptr() const
Get a safer wrapper raw C++ pointer to the underlying object.
T * get() const
Get the raw C++ pointer to the underlying object.
Concrete serial communicator subclass.
~SyncTimeMonitor() override
Destructor: stops the timer.
SyncTimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
void setColumnWidths(const Array< int > &colWidths)
Set the column widths to be used for subsequent rows.
int precision() const
Get the precision for writing doubles. Default is 4.
void writeWholeTable(std::ostream &out, const std::string &tableTitle, const Array< std::string > &columnNames, const Array< TableColumn > &columns) const
~TimeMonitor() override
Destructor: stops the timer.
static void computeGlobalTimerStatistics(stat_map_type &statData, std::vector< std::string > &statNames, Ptr< const Comm< int > > comm, const ECounterSetOp setOp=Intersection, const std::string &filter="")
Compute global timer statistics for all timers on the given communicator.
static Teuchos::RCP< Teuchos::StackedTimer > getStackedTimer()
The StackedTimer used by the TimeMonitor.
static void enableTimer(const std::string &name)
Enable the timer with the given name.
static void zeroOutTimers()
Reset all global timers to zero.
static void setStackedTimer(const Teuchos::RCP< Teuchos::StackedTimer > &t)
Sets the StackedTimer into which the TimeMonitor will insert timings.
static void report(Ptr< const Comm< int > > comm, std::ostream &out, const std::string &filter, const RCP< ParameterList > &params=null)
Report timer statistics to the given output stream.
static void summarize(Ptr< const Comm< int > > comm, std::ostream &out=std::cout, const bool alwaysWriteLocal=false, const bool writeGlobalStats=true, const bool writeZeroTimers=true, const ECounterSetOp setOp=Intersection, const std::string &filter="", const bool ignoreZeroTimers=false)
Print summary statistics for all timers on the given communicator.
static Teuchos::RCP< Teuchos::StackedTimer > stackedTimer_
Stacked timer for optional injection of timing from TimeMonitor-enabled objects.
static RCP< const ParameterList > getValidReportParameters()
Default parameters (with validators) for report().
static void disableTimer(const std::string &name)
Disable the timer with the given name.
TimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
TimeMonitor(Time &timer, bool reset=false)
Constructor: starts the timer.
Wall-clock timer.
const std::string & name() const
The name of this timer.
void start(bool reset=false)
Start the timer, if the timer is enabled (see disable()).
double stop()
Stop the timer, if the timer is enabled (see disable()).
Base interface class for user-defined reduction operations for objects that use value semantics.
RCP< basic_FancyOStream< char > > fancyOStream(const RCP< std::basic_ostream< char > > &oStream, const std::basic_string< char > &tabIndentStr=" ", const int startingTab=0, const bool showLinePrefix=false, const int maxLenLinePrefix=10, const bool showTabCount=false, const bool showProcRank=false)
Dynamically allocate a FancyOStream and return it wrapped in an RCP object.
RCP< basic_FancyOStream< char > > getFancyOStream(const RCP< std::basic_ostream< char > > &out)
Get a FancyOStream from an std::ostream object.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
bool nonnull(const std::shared_ptr< T > &p)
Returns true if p.get()!=NULL.
basic_FancyOStream< char > FancyOStream
basic_OSTab< char > OSTab
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
ECounterSetOp
Set operation type for mergeCounterNames() to perform.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
void mergeCounterNames(const Comm< int > &comm, const Array< std::string > &localNames, Array< std::string > &globalNames, const ECounterSetOp setOp)
Merge counter names over all processors.
std::map< std::string, std::vector< std::pair< double, double > > > stat_map_type
Global statistics collected from timer data.
static T zero()
Returns representation of zero for this scalar type.