Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_reallocDualViewIfNeeded.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_REALLOCDUALVIEWIFNEEDED_HPP
11#define TPETRA_DETAILS_REALLOCDUALVIEWIFNEEDED_HPP
12
19
21#include "Kokkos_DualView.hpp"
22
23namespace Tpetra {
24namespace Details {
25
49template<class ValueType, class DeviceType>
50bool
51reallocDualViewIfNeeded (Kokkos::DualView<ValueType*, DeviceType>& dv,
52 const size_t newSize,
53 const char newLabel[],
54 const size_t tooBigFactor = 2,
55 const bool needFenceBeforeRealloc = true)
56{
57 typedef typename DeviceType::execution_space execution_space;
58 typedef Kokkos::DualView<ValueType*, DeviceType> dual_view_type;
59 typedef Kokkos::pair<size_t, size_t> range_type;
60
61 // Profiling this matters, because GPU allocations can be expensive.
63 ProfilingRegion region ("Tpetra::Details::reallocDualViewIfNeeded");
64
65 const size_t curSize = static_cast<size_t> (dv.extent (0));
66 if (curSize == newSize) {
67 return false; // did not reallocate
68 }
69 else if (curSize < newSize) { // too small; need to reallocate
70 if (needFenceBeforeRealloc) {
71 execution_space().fence (); // keep this fence to respect needFenceBeforeRealloc
72 }
73 dv = dual_view_type (); // free first, in order to save memory
74 // If current size is 0, the DualView's Views likely lack a label.
75 dv = dual_view_type (curSize == 0 ? newLabel : dv.view_device().label (), newSize);
76 return true; // we did reallocate
77 }
78 else {
79 if (newSize == 0) { // special case: realloc to 0 means always do it
80 if (needFenceBeforeRealloc) {
81 execution_space().fence (); // keep this fence to respect needFenceBeforeRealloc
82 }
83 // If current size is 0, the DualView's Views likely lack a label.
84 dv = dual_view_type (curSize == 0 ? newLabel : dv.view_device().label (), 0);
85 return true; // we did reallocate
86 }
87 // Instead of writing curSize >= tooBigFactor * newSize, express
88 // via division to avoid overflow (for very large right-hand side).
89 // We've already tested whether newSize == 0, so this is safe.
90 else if (curSize / newSize >= tooBigFactor) {
91 // The allocation is much too big, so free it and reallocate
92 // to the new, smaller size.
93 if (needFenceBeforeRealloc) {
94 execution_space().fence (); // keep this fence to respect needFenceBeforeRealloc
95 }
96 dv = dual_view_type (); // free first, in order to save memory
97 // If current size is 0, the DualView's Views likely lack a label.
98 dv = dual_view_type (curSize == 0 ? newLabel : dv.view_device().label (), newSize);
99 return true; // we did reallocate
100 }
101 else {
102 auto d_view = Kokkos::subview (dv.view_device(), range_type (0, newSize));
103 auto h_view = Kokkos::subview (dv.view_host(), range_type (0, newSize));
104 dv = Kokkos::DualView<ValueType*, DeviceType>(d_view, h_view);
105 return false; // we did not reallocate
106 }
107 }
108}
109
111template<class ValueType, class DeviceType>
112bool
113reallocDualViewIfNeeded (Kokkos::DualView<ValueType*, DeviceType>& exports,
114 const size_t newSize,
115 const std::string& newLabel,
116 const size_t tooBigFactor = 2,
117 const bool needFenceBeforeRealloc = true)
118{
119 return reallocDualViewIfNeeded<ValueType, DeviceType> (exports, newSize,
120 newLabel.c_str (),
121 tooBigFactor,
122 needFenceBeforeRealloc);
123}
124
125} // namespace Details
126} // namespace Tpetra
127
128#endif // TPETRA_DETAILS_REALLOCDUALVIEWIFNEEDED_HPP
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Nonmember function that computes a residual Computes R = B - A * X.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Namespace Tpetra contains the class and methods constituting the Tpetra library.