22#include "Teuchos_OrdinalTraits.hpp"
23#include "Teuchos_TestForException.hpp"
24#include "TpetraCore_config.h"
26#include "KokkosKernels_config.h"
63#if not(defined(WIN) && (_MSC_VER >= 1900))
71namespace BehaviorDetails {
73constexpr const std::string_view RESERVED_PREFIX =
"TPETRA_";
74constexpr const std::string_view ASSUME_GPU_AWARE_MPI =
75 "TPETRA_ASSUME_GPU_AWARE_MPI";
76constexpr const std::string_view CUDA_LAUNCH_BLOCKING =
"CUDA_LAUNCH_BLOCKING";
77constexpr const std::string_view MM_TAFC_OptimizationCoreCount =
78 "MM_TAFC_OptimizationCoreCount";
79constexpr const std::string_view VERBOSE_PRINT_COUNT_THRESHOLD =
80 "TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD";
81constexpr const std::string_view ROW_IMBALANCE_THRESHOLD =
82 "TPETRA_ROW_IMBALANCE_THRESHOLD";
83constexpr const std::string_view MULTIVECTOR_USE_MERGE_PATH =
84 "TPETRA_MULTIVECTOR_USE_MERGE_PATH";
85constexpr const std::string_view VECTOR_DEVICE_THRESHOLD =
86 "TPETRA_VECTOR_DEVICE_THRESHOLD";
87constexpr const std::string_view HIERARCHICAL_UNPACK_BATCH_SIZE =
88 "TPETRA_HIERARCHICAL_UNPACK_BATCH_SIZE";
89constexpr const std::string_view HIERARCHICAL_UNPACK_TEAM_SIZE =
90 "TPETRA_HIERARCHICAL_UNPACK_TEAM_SIZE";
91constexpr const std::string_view USE_TEUCHOS_TIMERS =
92 "TPETRA_USE_TEUCHOS_TIMERS";
93constexpr const std::string_view USE_KOKKOS_PROFILING =
94 "TPETRA_USE_KOKKOS_PROFILING";
95constexpr const std::string_view DEBUG =
"TPETRA_DEBUG";
96constexpr const std::string_view VERBOSE =
"TPETRA_VERBOSE";
97constexpr const std::string_view TIMING =
"TPETRA_TIMING";
98constexpr const std::string_view HIERARCHICAL_UNPACK =
99 "TPETRA_HIERARCHICAL_UNPACK";
100constexpr const std::string_view SKIP_COPY_AND_PERMUTE =
101 "TPETRA_SKIP_COPY_AND_PERMUTE";
102constexpr const std::string_view FUSED_RESIDUAL =
"TPETRA_FUSED_RESIDUAL";
103constexpr const std::string_view OVERLAP =
"TPETRA_OVERLAP";
104constexpr const std::string_view SPACES_ID_WARN_LIMIT =
105 "TPETRA_SPACES_ID_WARN_LIMIT";
106constexpr const std::string_view TIME_KOKKOS_DEEP_COPY =
107 "TPETRA_TIME_KOKKOS_DEEP_COPY";
108constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE1 =
109 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE1";
110constexpr const std::string_view TIME_KOKKOS_DEEP_COPY_VERBOSE2 =
111 "TPETRA_TIME_KOKKOS_DEEP_COPY_VERBOSE2";
112constexpr const std::string_view TIME_KOKKOS_FENCE =
"TPETRA_TIME_KOKKOS_FENCE";
113constexpr const std::string_view TIME_KOKKOS_FUNCTIONS =
114 "TPETRA_TIME_KOKKOS_FUNCTIONS";
118template <
typename... Elems>
119constexpr std::array<std::string_view,
sizeof...(Elems)>
120make_array(Elems &&... elems) {
121 return {std::forward<Elems>(elems)...};
124constexpr const auto RECOGNIZED_VARS = make_array(
125 ASSUME_GPU_AWARE_MPI, CUDA_LAUNCH_BLOCKING, MM_TAFC_OptimizationCoreCount,
126 VERBOSE_PRINT_COUNT_THRESHOLD, ROW_IMBALANCE_THRESHOLD,
127 MULTIVECTOR_USE_MERGE_PATH, VECTOR_DEVICE_THRESHOLD,
128 HIERARCHICAL_UNPACK_BATCH_SIZE, HIERARCHICAL_UNPACK_TEAM_SIZE,
129 USE_TEUCHOS_TIMERS, USE_KOKKOS_PROFILING, DEBUG, VERBOSE, TIMING,
130 HIERARCHICAL_UNPACK, SKIP_COPY_AND_PERMUTE, FUSED_RESIDUAL, OVERLAP,
131 SPACES_ID_WARN_LIMIT, TIME_KOKKOS_DEEP_COPY, TIME_KOKKOS_DEEP_COPY_VERBOSE1,
132 TIME_KOKKOS_DEEP_COPY_VERBOSE2, TIME_KOKKOS_FENCE, TIME_KOKKOS_FUNCTIONS);
135std::map<std::string, std::map<std::string, bool> > namedVariableMap_;
136bool verboseDisabled_ =
false;
137bool timingDisabled_ =
false;
142 enum EnvironmentVariableState
144 EnvironmentVariableIsSet_ON,
145 EnvironmentVariableIsSet_OFF,
146 EnvironmentVariableIsSet,
147 EnvironmentVariableIsNotSet
153 std::string stringToUpper (std::string s)
155 std::transform (s.begin (), s.end (), s.begin (),
156 [] (
unsigned char c) { return std::toupper (c); });
161 split(
const std::string_view s,
162 std::function<
void(
const std::string&)> f,
165 typedef std::string::size_type size_type;
166 size_type cur_pos, last_pos=0, length=s.length();
167 while(last_pos < length + 1)
169 cur_pos = s.find_first_of(sep, last_pos);
170 if(cur_pos == std::string::npos)
174 if(cur_pos!=last_pos) {
175 auto token = std::string(s.data()+last_pos, (size_type)cur_pos-last_pos);
178 last_pos = cur_pos + 1;
183 EnvironmentVariableState
184 environmentVariableState(
const std::string& environmentVariableValue)
186 std::string v = stringToUpper(environmentVariableValue);
187 if (v ==
"1" || v ==
"YES" || v ==
"TRUE" || v ==
"ON")
189 return EnvironmentVariableIsSet_ON;
190 else if (v ==
"0" || v ==
"NO" || v ==
"FALSE" || v ==
"OFF")
192 return EnvironmentVariableIsSet_OFF;
194 return EnvironmentVariableIsSet;
198 setEnvironmentVariableMap (
const char environmentVariableName[],
199 std::map<std::string,std::map<std::string, bool> >& valsMap,
200 const bool defaultValue)
208 valsMap[environmentVariableName] = map<string,bool>{{
"DEFAULT", defaultValue}};
210 const char* varVal = getenv (environmentVariableName);
211 if (varVal ==
nullptr) {
218 const string varStr(varVal);
219 vector<string> names;
220 split(varStr, [&](
const string& x){names.push_back(x);});
221 for (
auto const& name: names) {
222 auto state = environmentVariableState(name);
223 if (state == EnvironmentVariableIsSet_ON) {
226 valsMap[environmentVariableName][
"DEFAULT"] =
true;
228 else if (state == EnvironmentVariableIsSet_OFF) {
231 valsMap[environmentVariableName][
"DEFAULT"] =
false;
236 valsMap[environmentVariableName][name] =
true;
243 idempotentlyGetNamedEnvironmentVariableAsBool (
const char name[],
245 const char environmentVariableName[],
246 const bool defaultValue)
248 using BehaviorDetails::namedVariableMap_;
250 setEnvironmentVariableMap (environmentVariableName,
255 auto thisEnvironmentVariableMap = namedVariableMap_[environmentVariableName];
256 auto thisEnvironmentVariable = thisEnvironmentVariableMap.find(name);
257 if (thisEnvironmentVariable != thisEnvironmentVariableMap.end())
258 return thisEnvironmentVariable->second;
259 return thisEnvironmentVariableMap[
"DEFAULT"];
264T getEnvironmentVariable(
const std::string_view environmentVariableName,
265 const T defaultValue) {
266 const char prefix[] =
"Tpetra::Details::Behavior: ";
268 const char *varVal = std::getenv(environmentVariableName.data());
269 if (varVal ==
nullptr) {
272 std::stringstream ss(varVal);
276 TEUCHOS_TEST_FOR_EXCEPTION(!ss, std::out_of_range,
277 prefix <<
"Environment "
279 << environmentVariableName
283 <<
" that cannot be parsed as a "
284 <<
typeid(T).name() <<
".");
292bool getEnvironmentVariable<bool>(
293 const std::string_view environmentVariableName,
const bool defaultValue) {
294 const char *varVal = std::getenv(environmentVariableName.data());
295 bool retVal = defaultValue;
296 if (varVal !=
nullptr) {
297 auto state = environmentVariableState(std::string(varVal));
298 if (state == EnvironmentVariableIsSet_ON)
300 else if (state == EnvironmentVariableIsSet_OFF)
313getEnvironmentVariable<size_t>(
const std::string_view environmentVariableName,
314 const size_t defaultValue) {
315 const char prefix[] =
"Tpetra::Details::Behavior: ";
317 const char *varVal = std::getenv(environmentVariableName.data());
318 if (varVal ==
nullptr) {
321 long long val = std::stoll(stringToUpper(varVal));
322 if (val <
static_cast<long long>(0)) {
324 return std::numeric_limits<size_t>::max();
326 if (
sizeof(
long long) >
sizeof(
size_t)) {
330 constexpr long long maxSizeT =
331 static_cast<long long>(std::numeric_limits<size_t>::max());
332 TEUCHOS_TEST_FOR_EXCEPTION(
333 val > maxSizeT, std::out_of_range,
334 prefix <<
"Environment "
336 << environmentVariableName
339 << val <<
" larger than the largest size_t value " << maxSizeT
342 return static_cast<size_t>(val);
347T idempotentlyGetEnvironmentVariable(
348 T &value,
bool &initialized,
const std::string_view environmentVariableName,
349 const T defaultValue) {
351 value = getEnvironmentVariable<T>(environmentVariableName, defaultValue);
358 constexpr bool debugDefault () {
359#ifdef HAVE_TPETRA_DEBUG
366 constexpr bool verboseDefault () {
370 constexpr bool timingDefault () {
374 constexpr bool assumeMpiIsGPUAwareDefault () {
375#ifdef TPETRA_ASSUME_GPU_AWARE_MPI
382 constexpr bool cudaLaunchBlockingDefault () {
386 constexpr bool hierarchicalUnpackDefault () {
395 static bool once =
false;
398 const char prefix[] =
"Tpetra::Details::Behavior: ";
400#if defined(WIN) && (_MSC_VER >= 1900)
401 env = *__p__environ();
405 for (; *env; ++env) {
409 const std::string_view ev(*env);
414 [&](
const std::string &s) {
423 if (name.size() >= BehaviorDetails::RESERVED_PREFIX.size() &&
424 name.substr(0, BehaviorDetails::RESERVED_PREFIX.size()) ==
425 BehaviorDetails::RESERVED_PREFIX) {
426 const auto it = std::find(BehaviorDetails::RECOGNIZED_VARS.begin(),
427 BehaviorDetails::RECOGNIZED_VARS.end(), name);
428 TEUCHOS_TEST_FOR_EXCEPTION(
429 it == BehaviorDetails::RECOGNIZED_VARS.end(), std::out_of_range,
430 prefix <<
"Environment "
432 << name <<
"\" (prefixed with \""
433 << BehaviorDetails::RESERVED_PREFIX
434 <<
"\") is not a recognized Tpetra variable.");
443 constexpr bool defaultValue = debugDefault();
445 static bool value_ = defaultValue;
446 static bool initialized_ =
false;
447 return idempotentlyGetEnvironmentVariable(
448 value_, initialized_, BehaviorDetails::DEBUG, defaultValue);
452 if (BehaviorDetails::verboseDisabled_)
455 constexpr bool defaultValue = verboseDefault();
457 static bool value_ = defaultValue;
458 static bool initialized_ =
false;
459 return idempotentlyGetEnvironmentVariable(
460 value_, initialized_, BehaviorDetails::VERBOSE, defaultValue);
464 if (BehaviorDetails::timingDisabled_)
467 constexpr bool defaultValue = timingDefault();
469 static bool value_ = defaultValue;
470 static bool initialized_ =
false;
471 return idempotentlyGetEnvironmentVariable(
472 value_, initialized_, BehaviorDetails::TIMING, defaultValue);
476 constexpr bool defaultValue = assumeMpiIsGPUAwareDefault();
478 static bool value_ = defaultValue;
479 static bool initialized_ =
false;
480 return idempotentlyGetEnvironmentVariable(
481 value_, initialized_, BehaviorDetails::ASSUME_GPU_AWARE_MPI,
486 constexpr bool defaultValue = cudaLaunchBlockingDefault();
488 static bool value_ = defaultValue;
489 static bool initialized_ =
false;
490 return idempotentlyGetEnvironmentVariable(
491 value_, initialized_, BehaviorDetails::CUDA_LAUNCH_BLOCKING,
496 constexpr int _default = 3000;
497 static int value_ = _default;
498 static bool initialized_ =
false;
499 return idempotentlyGetEnvironmentVariable(
500 value_, initialized_, BehaviorDetails::MM_TAFC_OptimizationCoreCount,
505 constexpr size_t defaultValue(200);
507 static size_t value_ = defaultValue;
508 static bool initialized_ =
false;
509 return idempotentlyGetEnvironmentVariable(
510 value_, initialized_, BehaviorDetails::VERBOSE_PRINT_COUNT_THRESHOLD,
515 constexpr size_t defaultValue(256);
517 static size_t value_ = defaultValue;
518 static bool initialized_ =
false;
519 return idempotentlyGetEnvironmentVariable(
520 value_, initialized_, BehaviorDetails::ROW_IMBALANCE_THRESHOLD,
525 constexpr bool defaultValue =
false;
527 static bool value_ = defaultValue;
528 static bool initialized_ =
false;
529 return idempotentlyGetEnvironmentVariable(
530 value_, initialized_, BehaviorDetails::MULTIVECTOR_USE_MERGE_PATH,
535 constexpr size_t defaultValue(22000);
537 static size_t value_ = defaultValue;
538 static bool initialized_ =
false;
539 return idempotentlyGetEnvironmentVariable(
540 value_, initialized_, BehaviorDetails::VECTOR_DEVICE_THRESHOLD,
546#ifdef HAVE_TPETRA_INST_CUDA
547 constexpr size_t defaultValue(16);
549 constexpr size_t defaultValue(256);
552 static size_t value_ = defaultValue;
553 static bool initialized_ =
false;
554 return idempotentlyGetEnvironmentVariable(
555 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_BATCH_SIZE,
560#ifdef HAVE_TPETRA_INST_CUDA
561 const size_t defaultValue(16);
563 const size_t defaultValue(Teuchos::OrdinalTraits<size_t>::invalid());
566 static size_t value_ = defaultValue;
567 static bool initialized_ =
false;
568 return idempotentlyGetEnvironmentVariable(
569 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK_TEAM_SIZE,
574 constexpr bool defaultValue(
false);
576 static bool value_ = defaultValue;
577 static bool initialized_ =
false;
578 return idempotentlyGetEnvironmentVariable(
579 value_, initialized_, BehaviorDetails::USE_TEUCHOS_TIMERS, defaultValue);
583 constexpr bool defaultValue(
false);
585 static bool value_ = defaultValue;
586 static bool initialized_ =
false;
587 return idempotentlyGetEnvironmentVariable(
588 value_, initialized_, BehaviorDetails::USE_KOKKOS_PROFILING,
593 constexpr bool defaultValue =
false;
595 static bool initialized_ =
false;
596 return idempotentlyGetNamedEnvironmentVariableAsBool(
597 name, initialized_, BehaviorDetails::DEBUG.data(), defaultValue);
601 if (BehaviorDetails::verboseDisabled_)
604 constexpr bool defaultValue =
false;
606 static bool initialized_ =
false;
607 return idempotentlyGetNamedEnvironmentVariableAsBool(
608 name, initialized_, BehaviorDetails::VERBOSE.data(), defaultValue);
612 BehaviorDetails::verboseDisabled_ =
false;
616 BehaviorDetails::verboseDisabled_ =
true;
620 if (BehaviorDetails::timingDisabled_)
623 constexpr bool defaultValue =
false;
625 static bool initialized_ =
false;
626 return idempotentlyGetNamedEnvironmentVariableAsBool(
627 name, initialized_, BehaviorDetails::TIMING.data(), defaultValue);
635 constexpr bool defaultValue = hierarchicalUnpackDefault();
637 static bool value_ = defaultValue;
638 static bool initialized_ =
false;
639 return idempotentlyGetEnvironmentVariable(
640 value_, initialized_, BehaviorDetails::HIERARCHICAL_UNPACK, defaultValue);
644 constexpr bool defaultValue(
false);
646 static bool value_ = defaultValue;
647 static bool initialized_ =
false;
648 return idempotentlyGetEnvironmentVariable(
649 value_, initialized_, BehaviorDetails::SKIP_COPY_AND_PERMUTE,
654#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || \
655 defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || \
656 defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
657 constexpr bool defaultValue(
false);
659 constexpr bool defaultValue(
true);
662 static bool value_ = defaultValue;
663 static bool initialized_ =
false;
664 return idempotentlyGetEnvironmentVariable(
665 value_, initialized_, BehaviorDetails::FUSED_RESIDUAL, defaultValue);
669 constexpr bool defaultValue(
false);
671 static bool value_ = defaultValue;
672 static bool initialized_ =
false;
673 return idempotentlyGetEnvironmentVariable(
674 value_, initialized_, BehaviorDetails::OVERLAP, defaultValue);
678 constexpr size_t defaultValue(16);
680 static size_t value_ = defaultValue;
681 static bool initialized_ =
false;
682 return idempotentlyGetEnvironmentVariable(
683 value_, initialized_, BehaviorDetails::SPACES_ID_WARN_LIMIT,
688 constexpr bool defaultValue(
false);
690 static bool value_ = defaultValue;
691 static bool initialized_ =
false;
692 return idempotentlyGetEnvironmentVariable(
693 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY,
698 constexpr bool defaultValue(
false);
700 static bool value_ = defaultValue;
701 static bool initialized_ =
false;
702 return idempotentlyGetEnvironmentVariable(
703 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE1,
708 constexpr bool defaultValue(
false);
710 static bool value_ = defaultValue;
711 static bool initialized_ =
false;
712 return idempotentlyGetEnvironmentVariable(
713 value_, initialized_, BehaviorDetails::TIME_KOKKOS_DEEP_COPY_VERBOSE2,
718 constexpr bool defaultValue(
false);
720 static bool value_ = defaultValue;
721 static bool initialized_ =
false;
722 return idempotentlyGetEnvironmentVariable(
723 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FENCE, defaultValue);
727 constexpr bool defaultValue(
false);
729 static bool value_ = defaultValue;
730 static bool initialized_ =
false;
731 return idempotentlyGetEnvironmentVariable(
732 value_, initialized_, BehaviorDetails::TIME_KOKKOS_FUNCTIONS,
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
static bool timing()
Whether Tpetra is in timing mode.
static void enable_verbose_behavior()
Enable verbose mode, programatically.
static void disable_timing()
Disable timing, programatically.
static bool cudaLaunchBlocking()
Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size....
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool fusedResidual()
Fusing SpMV and update in residual instead of using 2 kernel launches. Fusing kernels implies that no...
static bool hierarchicalUnpack()
Unpack rows of a matrix using hierarchical unpacking.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static int TAFC_OptimizationCoreCount()
MPI process count above which Tpetra::CrsMatrix::transferAndFillComplete will attempt to do advanced ...
static bool overlapCommunicationAndComputation()
Overlap communication and computation.
static void enable_timing()
Enable timing, programatically.
static bool profilingRegionUseTeuchosTimers()
Use Teuchos::Timer in Tpetra::ProfilingRegion.
static bool profilingRegionUseKokkosProfiling()
Use Kokkos::Profiling in Tpetra::ProfilingRegion.
static bool verbose()
Whether Tpetra is in verbose mode.
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool useMergePathMultiVector()
Whether to use the cuSPARSE merge path algorithm to perform sparse matrix-multivector products,...
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination....
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static void disable_verbose_behavior()
Disable verbose mode, programatically.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row....
static bool skipCopyAndPermuteIfPossible()
Skip copyAndPermute if possible.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
Nonmember function that computes a residual Computes R = B - A * X.
Namespace Tpetra contains the class and methods constituting the Tpetra library.