Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_ExecutionSpaces.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_EXECUTIONSPACES_HPP
11#define TPETRA_DETAILS_EXECUTIONSPACES_HPP
12
13#include <iostream>
14#include <sstream>
15#include <vector>
16
17#include <Kokkos_Core.hpp>
18
19#include <Teuchos_RCP.hpp>
20
23
39
40#define TPETRA_DETAILS_SPACES_THROW(x) \
41 { \
42 std::stringstream ss; \
43 ss << __FILE__ << ":" << __LINE__ << ": " << x; \
44 throw std::runtime_error(ss.str()); \
45 }
46
47namespace Tpetra {
48namespace Details {
49namespace Spaces {
50
56enum class Priority {
57 low = 0,
58 medium = 1,
59 high = 2,
60 NUM_LEVELS = 3 // not to be used as a priority
61};
62
63#if defined(KOKKOS_ENABLE_CUDA)
64inline void success_or_throw(cudaError_t err, const char *file,
65 const int line) {
66 if (err != cudaSuccess) {
67 std::stringstream ss;
68 ss << file << ":" << line << ": ";
69 ss << cudaGetErrorString(err);
70 throw std::runtime_error(ss.str());
71 }
72}
73#define TPETRA_DETAILS_SPACES_CUDA_RUNTIME(x) \
74 Tpetra::Details::Spaces::success_or_throw((x), __FILE__, __LINE__)
75#endif // KOKKOS_ENABLE_CUDA
76
83void lazy_init();
84
85#if defined(KOKKOS_ENABLE_CUDA)
86struct CudaInfo {
87 bool initialized_;
88 int lowPrio_;
89 int mediumPrio_; // same as CUDA default
90 int highPrio_;
91 cudaEvent_t execSpaceWaitEvent_; // see exec_space_wait
92
93 CudaInfo();
94 ~CudaInfo() = default; // execSpaceWaitEvent_ cleaned up by CUDA deinit
95 CudaInfo(const CudaInfo &other) = delete;
96 CudaInfo(CudaInfo &&other) = delete;
97};
98extern CudaInfo cudaInfo;
99#endif // KOKKOS_ENABLE_CUDA
100
101// Tpetra's managed spaces
102#if defined(KOKKOS_ENABLE_CUDA)
103template <typename Space>
104using IsCuda = std::enable_if_t<std::is_same_v<Space, Kokkos::Cuda>, bool>;
105template <typename Space>
106using NotCuda = std::enable_if_t<!std::is_same_v<Space, Kokkos::Cuda>, bool>;
107template <typename S1, typename S2>
108using BothCuda = std::enable_if_t<
109 std::is_same_v<S1, Kokkos::Cuda> && std::is_same_v<S2, Kokkos::Cuda>, bool>;
110template <typename S1, typename S2>
111using NotBothCuda = std::enable_if_t<!std::is_same_v<S1, Kokkos::Cuda> ||
112 !std::is_same_v<S2, Kokkos::Cuda>,
113 bool>;
114#endif // KOKKOS_ENABLE_CUDA
115
116#if defined(KOKKOS_ENABLE_SERIAL)
118template <typename Space>
119using IsSerial = std::enable_if_t<std::is_same_v<Space, Kokkos::Serial>, bool>;
120#endif // KOKKOS_ENABLE_SERIAL
121
122#if defined(KOKKOS_ENABLE_OPENMP)
124template <typename Space>
125using IsOpenMP = std::enable_if_t<std::is_same_v<Space, Kokkos::OpenMP>, bool>;
126#endif // KOKKOS_ENABLE_OPENMP
127
128#if defined(KOKKOS_ENABLE_HIP)
130template <typename Space>
131using IsHIP = std::enable_if_t<std::is_same_v<Space, Kokkos::HIP>, bool>;
132#endif // KOKKOS_ENABLE_HIP
133
134#if defined(KOKKOS_ENABLE_SYCL)
136template <typename Space>
137using IsSYCL = std::enable_if_t<std::is_same_v<Space, Kokkos::Experimental::SYCL>, bool>;
138#endif // KOKKOS_ENABLE_SYCL
139
145template <typename ExecSpace, Priority priority = Priority::medium
146#if defined(KOKKOS_ENABLE_CUDA)
147 ,
148 NotCuda<ExecSpace> = true
149#endif // KOKKOS_ENABLE_CUDA
150 >
151ExecSpace make_instance() {
152 return ExecSpace();
153}
154
161#if defined(KOKKOS_ENABLE_CUDA)
162template <typename ExecSpace, Priority priority = Priority::medium,
163 IsCuda<ExecSpace> = true>
164Kokkos::Cuda make_instance() {
165 lazy_init(); // CUDA priorities
166 cudaStream_t stream;
167 int prio;
168 switch (priority) {
169 case Priority::high:
170 prio = cudaInfo.highPrio_;
171 break;
172 case Priority::medium:
173 prio = cudaInfo.mediumPrio_;
174 break;
175 case Priority::low:
176 prio = cudaInfo.lowPrio_;
177 break;
178 default:
179 throw std::runtime_error("unexpected static Tpetra Space priority");
180 }
181 TPETRA_DETAILS_SPACES_CUDA_RUNTIME(
182 cudaStreamCreateWithPriority(&stream, cudaStreamNonBlocking, prio));
183 return Kokkos::Cuda(stream, true /*Kokkos will manage this stream*/);
184}
185#endif // KOKKOS_ENABLE_CUDA
186
192template <typename ExecSpace> ExecSpace make_instance(const Priority &prio) {
193 switch (prio) {
194 case Priority::high:
196 case Priority::medium:
198 case Priority::low:
200 default:
201 throw std::runtime_error("unexpected dynamic Tpetra Space priority");
202 }
203}
204
216template <typename ExecSpace> class InstanceLifetimeManager {
217public:
218 using execution_space = ExecSpace;
219 using rcp_type = Teuchos::RCP<const execution_space>;
220
227 template <Priority priority = Priority::medium>
228 rcp_type space_instance(int i = 0) {
230 "Tpetra::Details::Spaces::space_instance");
231
232 constexpr int p = static_cast<int>(priority);
233 static_assert(p < sizeof(instances) / sizeof(instances[0]),
234 "Spaces::Priority enum error");
235
236 if (i < 0) {
237 TPETRA_DETAILS_SPACES_THROW("requested instance id " << i << " (< 0)");
238 }
240 TPETRA_DETAILS_SPACES_THROW(
241 "requested instance id "
243 << ") set by TPETRA_SPACES_ID_WARN_LIMIT");
244 }
245
246 // make sure we can store an exec space at index i for priority
247 // not sure what happens in RCP(), so let's explicitly make it null
248 while (size_t(i) >= instances[p].size()) {
249 instances[p].push_back(Teuchos::ENull());
250 }
251
252 /* no exec space instance i of priority p exists.
253 It may have never existed, or all Tpetra objects referencing it have been
254 destructed.
255
256 Create a new RCP<ExecSpace> and internally store a weak
257 reference, so this space will be destructed when all strong references to
258 it are gone, but we can still refer to it as long as it lives to prevent
259 recreating
260 */
261 if (instances[p][i].is_null() || !instances[p][i].is_valid_ptr()) {
262
263 // create a strong RCP to a space
264 rcp_type r = Teuchos::RCP<const execution_space>(
265 new ExecSpace(make_instance<ExecSpace, priority>()));
266
267 // store a weak RCP to the space
268 instances[p][i] = r.create_weak();
269
270 return r; // allow strong rcp to escape so internal weak one does not
271 // immediately go away
272 }
273
274 auto r = instances[p][i].create_strong();
275 return r;
276 }
277
282 for (int i = 0; i < static_cast<int>(Spaces::Priority::NUM_LEVELS); ++i) {
283 for (const rcp_type &rcp : instances[i]) {
284 if (rcp.is_valid_ptr() && !rcp.is_null()) {
285 // avoid throwing in dtor
286 std::cerr << __FILE__ << ":" << __LINE__
287 << " execution space instance survived to "
288 "~InstanceLifetimeManager. strong_count() = "
289 << rcp.strong_count()
290 << ". Did a Tpetra object live past Kokkos::finalize()?"
291 << std::endl;
292 }
293 }
294 }
295 }
296
297private:
298 // one vector of instances for each priority level
299 std::vector<rcp_type>
300 instances[static_cast<int>(Spaces::Priority::NUM_LEVELS)];
301};
302
303#if defined(KOKKOS_ENABLE_CUDA)
304extern InstanceLifetimeManager<Kokkos::Cuda> cudaSpaces;
305#endif
306#if defined(KOKKOS_ENABLE_SERIAL)
307extern InstanceLifetimeManager<Kokkos::Serial> serialSpaces;
308#endif
309#if defined(KOKKOS_ENABLE_OPENMP)
310extern InstanceLifetimeManager<Kokkos::OpenMP> openMPSpaces;
311#endif
312#if defined(KOKKOS_ENABLE_HIP)
313extern InstanceLifetimeManager<Kokkos::HIP> HIPSpaces;
314#endif
315#if defined(KOKKOS_ENABLE_SYCL)
316extern InstanceLifetimeManager<Kokkos::Experimental::SYCL> SYCLSpaces;
317#endif
318
319#if defined(KOKKOS_ENABLE_CUDA)
320
324template <typename ExecSpace, Priority priority = Priority::medium,
325 IsCuda<ExecSpace> = true>
326Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
327 return cudaSpaces.space_instance<priority>(i);
328}
329#endif
330
331#if defined(KOKKOS_ENABLE_SERIAL)
335template <typename ExecSpace, Priority priority = Priority::medium,
336 IsSerial<ExecSpace> = true>
337Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
338 return serialSpaces.space_instance<priority>(i);
339}
340#endif
341
342#if defined(KOKKOS_ENABLE_OPENMP)
346template <typename ExecSpace, Priority priority = Priority::medium,
347 IsOpenMP<ExecSpace> = true>
348Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
349 return openMPSpaces.space_instance<priority>(i);
350}
351#endif
352
353#if defined(KOKKOS_ENABLE_HIP)
356template <typename ExecSpace, Priority priority = Priority::medium,
357 IsHIP<ExecSpace> = true>
358Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
359 return HIPSpaces.space_instance<priority>(i);
360}
361#endif
362#if defined(KOKKOS_ENABLE_SYCL)
366template <typename ExecSpace, Priority priority = Priority::medium,
367 IsSYCL<ExecSpace> = true>
368Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
369 return SYCLSpaces.space_instance<priority>(i);
370}
371#endif
372
378template <typename ExecSpace>
379Teuchos::RCP<const ExecSpace> space_instance(const Priority &priority,
380 int i = 0) {
381 switch (priority) {
382 case Priority::high:
384 case Priority::medium:
386 case Priority::low:
388 default:
389 throw std::runtime_error(
390 "unexpected dynamic Tpetra Space priority in space_instance");
391 }
392}
393
407template <typename S1, typename S2
408#if defined(KOKKOS_ENABLE_CUDA)
409 ,
410 NotBothCuda<S1, S2> = true
411#endif
412 >
413void exec_space_wait(const char *msg, const S1 &waitee, const S2 & /*waiter*/) {
415 "Tpetra::Details::Spaces::exec_space_wait");
416 lazy_init();
417 waitee.fence(msg);
418}
419
420#if defined(KOKKOS_ENABLE_CUDA)
421template <typename S1, typename S2, BothCuda<S1, S2> = true>
422void exec_space_wait(const char *msg, const S1 &waitee, const S2 &waiter) {
424 "Tpetra::Details::Spaces::exec_space_wait");
425 lazy_init();
426
427 // if they are the same instance, no sync needed
428 if (waitee.impl_instance_id() !=
429 waiter
430 .impl_instance_id()) { // TODO: use instance operator== once available
431 /* cudaStreamWaitEvent is not affected by later calls to cudaEventRecord,
432 even if it overwrites the state of a shared event this means we only need
433 one event even if many exec_space_waits are in flight at the same time
434 */
435 TPETRA_DETAILS_SPACES_CUDA_RUNTIME(
436 cudaEventRecord(cudaInfo.execSpaceWaitEvent_, waitee.cuda_stream()));
437 TPETRA_DETAILS_SPACES_CUDA_RUNTIME(cudaStreamWaitEvent(
438 waiter.cuda_stream(), cudaInfo.execSpaceWaitEvent_, 0 /*flags*/));
439 }
440}
441#endif
442
443template <typename S1, typename S2>
444void exec_space_wait(const S1 &waitee, const S2 &waiter) {
446 "Tpetra::Details::Spaces::exec_space_wait");
447 lazy_init();
448 exec_space_wait("anonymous", waitee, waiter);
449}
450
451template <typename ExecutionSpace>
452constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space() {
453 return false;
454}
455
456#if defined(KOKKOS_ENABLE_CUDA)
457template <>
458constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space<Kokkos::Cuda>() {
459 return true;
460}
461#endif
462
463#if defined(KOKKOS_ENABLE_HIP)
464template <>
465constexpr KOKKOS_INLINE_FUNCTION bool
466is_gpu_exec_space<Kokkos::HIP>() {
467 return true;
468}
469#endif
470
471#if defined(KOKKOS_ENABLE_SYCL)
472template <>
473constexpr KOKKOS_INLINE_FUNCTION bool
474is_gpu_exec_space<Kokkos::Experimental::SYCL>() {
475 return true;
476}
477#endif
478
479} // namespace Spaces
480} // namespace Details
481} // namespace Tpetra
482
483#undef TPETRA_DETAILS_SPACES_THROW
484
485#endif // TPETRA_DETAILS_EXECUTIONSPACES_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
void exec_space_wait(const char *msg, const S1 &waitee, const S2 &)
cause future work submitted to waiter to wait for the current work in waitee to finish
ExecSpace make_instance()
Construct a Kokkos execution space instance with the following priority.
Teuchos::RCP< const ExecSpace > space_instance(const Priority &priority, int i=0)
get a strong Teuchos::RCP to Tpetra-managed Kokkos execution space instance i
Priority
Priority interface for Tpetra's managed execution spaces.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
Provides reusable Kokkos execution space instances.
~InstanceLifetimeManager()
Issue a warning if any Tpetra-managed execution space instances survive to the end of static lifetime...
rcp_type space_instance(int i=0)
Retrieve a strong Teuchos::RCP<const ExecSpace> to instance i.
Nonmember function that computes a residual Computes R = B - A * X.
Namespace Tpetra contains the class and methods constituting the Tpetra library.