Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_KokkosTeuchosTimerInjection.cpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
11#include "TpetraCore_config.h"
13#include "Kokkos_Core.hpp"
14#include "Teuchos_TimeMonitor.hpp"
15#include "Teuchos_Time.hpp"
16#include "Teuchos_RCP.hpp"
17#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
18#include "Teuchos_StackedTimer.hpp"
19#include <sstream>
20#endif
21#include <string>
22
24
25
26
27namespace {
28 // Get a useful label from the deviceId
29 // NOTE: Relevant code is in: kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
30 std::string deviceIdToString(const uint32_t deviceId) {
31 using namespace Kokkos::Tools::Experimental;
32 std::string device_label("(");
33 ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
34 if (eid.type == DeviceType::Serial) device_label+="Serial";
35 else if (eid.type == DeviceType::OpenMP) device_label+="OpenMP";
36 else if (eid.type == DeviceType::Cuda) device_label+="Cuda";
37 else if (eid.type == DeviceType::HIP) device_label+="HIP";
38 else if (eid.type == DeviceType::OpenMPTarget) device_label+="OpenMPTarget";
39 else if (eid.type == DeviceType::HPX) device_label+="HPX";
40 else if (eid.type == DeviceType::Threads) device_label+="Threads";
41 else if (eid.type == DeviceType::SYCL) device_label+="SYCL";
42 else if (eid.type == DeviceType::OpenACC) device_label+="OpenACC";
43 else if (eid.type == DeviceType::Unknown) device_label+="Unknown";
44 else device_label+="Unknown to Tpetra";
45#if KOKKOS_VERSION >= 40499
46 if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
47 device_label += " All Instances)";
48 else if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
49 device_label += " DeepCopyResource)";
50#else
51 if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
52 device_label += " All Instances)";
53 else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
54 device_label += " DeepCopyResource)";
55#endif
56 else
57 device_label += " Instance " + std::to_string(eid.instance_id) + ")";
58
59 return device_label;
60 }
61
62 void overlappingWarning() {
63 std::ostringstream warning;
64 warning <<
65 "\n*********************************************************************\n"
66 "WARNING: Overlapping timers detected!\n"
67 "A TimeMonitor timer was stopped before a nested subtimer was\n"
68 "stopped. This is not allowed by the StackedTimer. This corner case\n"
69 "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
70 "assigned to a new timer. To disable this warning, either fix the\n"
71 "ordering of timer creation and destuction or disable the StackedTimer\n";
72 std::cout << warning.str() << std::endl;
73 }
74
75}// anonymous space
76
77
78namespace Tpetra {
79namespace Details {
80
81 namespace DeepCopyTimerInjection {
82 Teuchos::RCP<Teuchos::Time> timer_;
83 bool initialized_ = false;
84
85 void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
86 Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr,
87 uint64_t size) {
88 // In verbose mode, we add the src/dst names as well
89 std::string extra_label;
91 extra_label = std::string(" {") + src_name + "=>" + dst_name + "}";
93 extra_label = std::string(" {") + src_name + "=>" + dst_name + "," + std::to_string(size)+"}";
94 }
95
96 if(timer_ != Teuchos::null)
97 std::cout << "WARNING: Kokkos::deep_copy() started within another Kokkos::deep_copy(). Timers will be in error"<<std::endl;
98
99 // If the src_name is "Scalar" or "(none)" then we're doing a "Fill" style copy from host to devices, which we want to record separately.
100 if(!strcmp(src_name,"Scalar") || !strcmp(src_name,"(none)"))
101 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::deep_copy_scalar [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label);
102 // If the size is under 65 bytes, we're going to flag this as "small" to make it easier to watch the big stuff
103 else if(size <= 64)
104 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::deep_copy_small [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label);
105 else
106 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::deep_copy [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label);
107 timer_->start();
108 timer_->incrementNumCalls();
109#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
110 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
111 if (nonnull(stackedTimer))
112 stackedTimer->start(timer_->name());
113#endif
114 }
115
116 void kokkosp_end_deep_copy() {
117 if (timer_ != Teuchos::null) {
118 timer_->stop();
119#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
120 try {
121 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
122 if (nonnull(stackedTimer))
123 stackedTimer->stop(timer_->name());
124 }
125 catch (std::runtime_error&) {
126 overlappingWarning();
127 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
128 }
129#endif
130 }
131 timer_ = Teuchos::null;
132 }
133
134 }// end DeepCopyTimerInjection
135
136 void AddKokkosDeepCopyToTimeMonitor(bool force) {
137 if (!DeepCopyTimerInjection::initialized_) {
140 Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyTimerInjection::kokkosp_begin_deep_copy);
141 Kokkos::Tools::Experimental::set_end_deep_copy_callback(DeepCopyTimerInjection::kokkosp_end_deep_copy);
142 DeepCopyTimerInjection::initialized_=true;
143 }
144 }
145 }
146
147
148 namespace FenceTimerInjection {
149 Teuchos::RCP<Teuchos::Time> timer_;
150 bool initialized_ = false;
151 uint64_t active_handle;
152
153 void kokkosp_begin_fence(const char* name, const uint32_t deviceId,
154 uint64_t* handle) {
155
156 // Nested fences are not allowed
157 if(timer_ != Teuchos::null)
158 return;
159 active_handle = (active_handle+1) % 1024;
160 *handle = active_handle;
161
162 std::string device_label = deviceIdToString(deviceId);
163
164 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::fence ")+name + " " + device_label);
165 timer_->start();
166 timer_->incrementNumCalls();
167#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
168 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
169 if (nonnull(stackedTimer))
170 stackedTimer->start(timer_->name());
171#endif
172
173 }
174
175
176 void kokkosp_end_fence(const uint64_t handle) {
177 if(handle == active_handle) {
178 if (timer_ != Teuchos::null) {
179 timer_->stop();
180#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
181 try {
182 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
183 if (nonnull(stackedTimer))
184 stackedTimer->stop(timer_->name());
185 }
186 catch (std::runtime_error&) {
187 overlappingWarning();
188 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
189 }
190#endif
191 }
192 timer_ = Teuchos::null;
193 }
194 // Else: We've nested our fences, and we need to ignore the inner fences
195 }
196
197
198 }//end FenceTimerInjection
199
200 void AddKokkosFenceToTimeMonitor(bool force) {
201 if (!FenceTimerInjection::initialized_) {
203 Kokkos::Tools::Experimental::set_begin_fence_callback(FenceTimerInjection::kokkosp_begin_fence);
204 Kokkos::Tools::Experimental::set_end_fence_callback(FenceTimerInjection::kokkosp_end_fence);
205 FenceTimerInjection::initialized_=true;
206 }
207 }
208 }
209
210
211 namespace FunctionsTimerInjection {
212 Teuchos::RCP<Teuchos::Time> timer_;
213 bool initialized_ = false;
214
215 void kokkosp_begin_kernel(const char* kernelName, const char* kernelPrefix, const uint32_t devID,
216 uint64_t* kernelID) {
217 // Nested fences are not allowed
218 if(timer_ != Teuchos::null)
219 return;
220 std::string device_label = deviceIdToString(devID);
221
222 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::")+ kernelName + " " +kernelPrefix + " " + device_label);
223 timer_->start();
224 timer_->incrementNumCalls();
225#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
226 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
227 if (nonnull(stackedTimer))
228 stackedTimer->start(timer_->name());
229#endif
230
231 }
232
233 void kokkosp_begin_for(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
234 kokkosp_begin_kernel("parallel_for",kernelPrefix,devID,kernelID);
235 }
236
237 void kokkosp_begin_scan(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
238 kokkosp_begin_kernel("parallel_scan",kernelPrefix,devID,kernelID);
239 }
240
241 void kokkosp_begin_reduce(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
242 kokkosp_begin_kernel("parallel_reduce",kernelPrefix,devID,kernelID);
243 }
244
245 void kokkosp_end_kernel(const uint64_t handle) {
246 if (timer_ != Teuchos::null) {
247 timer_->stop();
248#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
249 try {
250 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
251 if (nonnull(stackedTimer))
252 stackedTimer->stop(timer_->name());
253 }
254 catch (std::runtime_error&) {
255 overlappingWarning();
256 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
257 }
258#endif
259 }
260
261 timer_ = Teuchos::null;
262 }
263 }//end FunctionsInjection
264
265 void AddKokkosFunctionsToTimeMonitor(bool force) {
266 if (!FunctionsTimerInjection::initialized_) {
268 Kokkos::Tools::Experimental::set_begin_parallel_for_callback(FunctionsTimerInjection::kokkosp_begin_for);
269 Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_begin_reduce);
270 Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(FunctionsTimerInjection::kokkosp_begin_scan);
271
272 // The end-call is generic, even though the start-call is not.
273 Kokkos::Tools::Experimental::set_end_parallel_for_callback(FunctionsTimerInjection::kokkosp_end_kernel);
274 Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_end_kernel);
275 Kokkos::Tools::Experimental::set_end_parallel_scan_callback(FunctionsTimerInjection::kokkosp_end_kernel);
276 FunctionsTimerInjection::initialized_=true;
277 }
278 }
279 }
280
281
282
283} // namespace Details
284} // namespace Tpetra
285
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration functions that use Kokkos' profiling library to add deep copies between memory spaces,...
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size....
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination....
Nonmember function that computes a residual Computes R = B - A * X.
Namespace Tpetra contains the class and methods constituting the Tpetra library.