Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_copyConvert.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_COPYCONVERT_HPP
11#define TPETRA_DETAILS_COPYCONVERT_HPP
12
17
18#include "TpetraCore_config.h"
19#include "Kokkos_Core.hpp"
20#include "Kokkos_ArithTraits.hpp"
21#include <sstream>
22#include <stdexcept>
23#include <type_traits>
24
25namespace Tpetra {
26namespace Details {
27
28//
29// Implementation details for copyConvert (see below).
30// Users should skip over this anonymous namespace.
31//
32namespace { // (anonymous)
33
34 // We need separate implementations for both (T,complex) and
35 // (complex,T), but we can't just overload for both cases, because
36 // that would be ambiguous (e.g., (complex,complex)).
37 template<class OutputValueType,
38 class InputValueType,
39 const bool outputIsComplex =
40 Kokkos::ArithTraits<OutputValueType>::is_complex,
41 const bool inputIsComplex =
42 Kokkos::ArithTraits<InputValueType>::is_complex>
43 struct ConvertValue
44 {
45 static KOKKOS_INLINE_FUNCTION void
46 convert (OutputValueType& dst, const InputValueType& src)
47 {
48 // This looks trivial, but it actually invokes OutputValueType's
49 // constructor, so that needs to be marked as a __host__
50 // __device__ function (e.g., via the KOKKOS_FUNCTION or
51 // KOKKOS_INLINE_FUNCTION macros).
52 dst = OutputValueType (src);
53 }
54 };
55
56 template<class OutputRealType, class InputComplexType>
57 struct ConvertValue<OutputRealType, InputComplexType, false, true>
58 {
59 static KOKKOS_INLINE_FUNCTION void
60 convert (OutputRealType& dst,
61 const InputComplexType& src)
62 {
63 // OutputRealType's constructor needs to be marked with either
64 // KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
65 using KAI = Kokkos::ArithTraits<InputComplexType>;
66 dst = OutputRealType (KAI::real (src));
67 }
68 };
69
70 template<class OutputComplexType, class InputRealType>
71 struct ConvertValue<OutputComplexType, InputRealType, true, false>
72 {
73 static KOKKOS_INLINE_FUNCTION void
74 convert (OutputComplexType& dst,
75 const InputRealType& src)
76 {
77 // OutputComplexType's constructor needs to be marked with
78 // either KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
79 using output_mag_type =
80 typename Kokkos::ArithTraits<OutputComplexType>::mag_type;
81 using KAM = Kokkos::ArithTraits<output_mag_type>;
82 dst = OutputComplexType (src, KAM::zero ());
83 }
84 };
85
86 template<class OutputValueType,
87 class InputValueType>
88 KOKKOS_INLINE_FUNCTION void
89 convertValue (OutputValueType& dst, const InputValueType& src) {
90 ConvertValue<OutputValueType, InputValueType>::convert (dst, src);
91 }
92
97 template<class OutputViewType,
98 class InputViewType,
99 const int rank = static_cast<int> (OutputViewType::rank)>
100 class CopyConvertFunctor {};
101
102 template<class OutputViewType,
103 class InputViewType>
104 class CopyConvertFunctor<OutputViewType, InputViewType, 1> {
105 private:
106 static_assert
107 (static_cast<int> (OutputViewType::rank) == 1 &&
108 static_cast<int> (InputViewType::rank) == 1,
109 "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
110 "OutputViewType and InputViewType must both have rank 1.");
111 OutputViewType dst_;
112 InputViewType src_;
113
114 public:
115 using index_type = typename OutputViewType::size_type;
116
117 CopyConvertFunctor (const OutputViewType& dst,
118 const InputViewType& src) :
119 dst_ (dst),
120 src_ (src)
121 {}
122
123 KOKKOS_INLINE_FUNCTION void
124 operator () (const index_type i) const {
125 convertValue (dst_(i), src_(i));
126 }
127 };
128
129 template<class OutputViewType,
130 class InputViewType>
131 class CopyConvertFunctor<OutputViewType, InputViewType, 2> {
132 public:
133 using index_type = typename OutputViewType::size_type;
134
135 private:
136 static_assert
137 (static_cast<int> (OutputViewType::rank) == 2 &&
138 static_cast<int> (InputViewType::rank) == 2,
139 "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
140 "OutputViewType and InputViewType must both have rank 2.");
141 OutputViewType dst_;
142 InputViewType src_;
143 index_type numCols_;
144
145 public:
146 CopyConvertFunctor (const OutputViewType& dst,
147 const InputViewType& src) :
148 dst_ (dst),
149 src_ (src),
150 numCols_ (dst.extent (1))
151 {}
152
153 KOKKOS_INLINE_FUNCTION void
154 operator () (const index_type i) const {
155 const index_type numCols = numCols_;
156 for (index_type j = 0; j < numCols; ++j) {
157 convertValue (dst_(i,j), src_(i,j));
158 }
159 }
160 };
161
163 template<class OutputViewType, class InputViewType>
164 class CanUseKokkosDeepCopy {
165 private:
166 static constexpr bool sameValueType =
167 std::is_same<typename OutputViewType::non_const_value_type,
168 typename InputViewType::non_const_value_type>::value;
169 static constexpr bool sameMemorySpace =
170 std::is_same<typename OutputViewType::memory_space,
171 typename InputViewType::memory_space>::value;
172 static constexpr bool sameLayout =
173 std::is_same<typename OutputViewType::array_layout,
174 typename InputViewType::array_layout>::value;
175
176 public:
177 static constexpr bool value =
178 sameValueType && (sameMemorySpace || sameLayout);
179 };
180
199 template<class OutputViewType,
200 class InputViewType,
201 const bool canUseKokkosDeepCopy =
202 CanUseKokkosDeepCopy<OutputViewType, InputViewType>::value,
203 const bool outputExecSpaceCanAccessInputMemSpace =
204 Kokkos::SpaceAccessibility<
205 typename OutputViewType::memory_space,
206 typename InputViewType::memory_space>::accessible>
207 struct CopyConvertImpl {
208 static void
209 run (const OutputViewType& dst,
210 const InputViewType& src);
211 };
212
214 template<class OutputViewType,
215 class InputViewType,
216 const bool outputExecSpaceCanAccessInputMemSpace>
217 struct CopyConvertImpl<OutputViewType, InputViewType,
218 true, outputExecSpaceCanAccessInputMemSpace>
219 {
220 static void
221 run (const OutputViewType& dst,
222 const InputViewType& src)
223 {
224 // NOTE: It's important to do the addition _inside_ the
225 // reinterpret-cast. If you reinterpret_cast the separate
226 // results, you may get the wrong answer (e.g., because
227 // ptrdiff_t is signed, and pointers may have arbitrary 64-bit
228 // virtual addresses). I'm speaking from experience here.
229 const ptrdiff_t dst_beg =reinterpret_cast<ptrdiff_t> (dst.data ());
230 const ptrdiff_t dst_end =
231 reinterpret_cast<ptrdiff_t> (dst.data () + dst.span ());
232 const ptrdiff_t src_beg = reinterpret_cast<ptrdiff_t> (src.data ());
233 const ptrdiff_t src_end =
234 reinterpret_cast<ptrdiff_t> (src.data () + src.span ());
235
236 if (dst_end > src_beg && src_end > dst_beg) {
237 // dst and src alias each other, so we can't call
238 // Kokkos::deep_copy(dst,src) directly (Kokkos detects this
239 // and throws, at least in debug mode). Instead, we make
240 // temporary host storage (create_mirror always makes a new
241 // allocation, unlike create_mirror_view). Use host because
242 // it's cheaper to allocate. Hopefully users aren't doing
243 // aliased copies in a tight loop.
244 auto src_copy = Kokkos::create_mirror (Kokkos::HostSpace (), src);
245 // DEEP_COPY REVIEW - NOT TESTED
246 Kokkos::deep_copy (src_copy, src);
247 // DEEP_COPY REVIEW - NOT TESTED
248 Kokkos::deep_copy (dst, src_copy);
249 }
250 else { // no aliasing
251 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
252 using execution_space = typename OutputViewType::execution_space;
253 Kokkos::deep_copy (execution_space(), dst, src);
254 }
255 }
256 };
257
260 template<class OutputViewType,
261 class InputViewType>
262 struct CopyConvertImpl<OutputViewType,
263 InputViewType,
264 false,
265 true>
266 {
267 static void
268 run (const OutputViewType& dst,
269 const InputViewType& src)
270 {
271 using functor_type = CopyConvertFunctor<OutputViewType, InputViewType>;
272 using execution_space = typename OutputViewType::execution_space;
273 using index_type = typename OutputViewType::size_type;
274 using range_type = Kokkos::RangePolicy<execution_space, index_type>;
275 Kokkos::parallel_for ("Tpetra::Details::copyConvert",
276 range_type (0, dst.extent (0)),
277 functor_type (dst, src));
278 }
279 };
280
287 template<class OutputViewType,
288 class InputViewType>
289 struct CopyConvertImpl<OutputViewType, InputViewType, false, false>
290 {
291 static void
292 run (const OutputViewType& dst,
293 const InputViewType& src)
294 {
295 using output_memory_space = typename OutputViewType::memory_space;
296 using output_execution_space = typename OutputViewType::execution_space;
297 auto src_outputSpaceCopy =
298 Kokkos::create_mirror_view (output_memory_space (), src);
299 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
300 Kokkos::deep_copy (output_execution_space(), src_outputSpaceCopy, src);
301
302 // The output View's execution space can access
303 // outputSpaceCopy's data, so we can run the functor now.
304 using output_space_copy_type = decltype (src_outputSpaceCopy);
305 using functor_type =
306 CopyConvertFunctor<OutputViewType, output_space_copy_type>;
307 using execution_space = typename OutputViewType::execution_space;
308 using index_type = typename OutputViewType::size_type;
309 using range_type = Kokkos::RangePolicy<execution_space, index_type>;
310 Kokkos::parallel_for ("Tpetra::Details::copyConvert",
311 range_type (0, dst.extent (0)),
312 functor_type (dst, src_outputSpaceCopy));
313 }
314 };
315} // namespace (anonymous)
316
325template<class OutputViewType,
326 class InputViewType>
327void
328copyConvert (const OutputViewType& dst,
329 const InputViewType& src)
330{
331 static_assert (Kokkos::is_view<OutputViewType>::value,
332 "OutputViewType must be a Kokkos::View.");
333 static_assert (Kokkos::is_view<InputViewType>::value,
334 "InputViewType must be a Kokkos::View.");
335 static_assert (std::is_same<typename OutputViewType::value_type,
336 typename OutputViewType::non_const_value_type>::value,
337 "OutputViewType must be a nonconst Kokkos::View.");
338 static_assert (static_cast<int> (OutputViewType::rank) ==
339 static_cast<int> (InputViewType::rank),
340 "src and dst must have the same rank.");
341
342 if (dst.extent (0) != src.extent (0)) {
343 std::ostringstream os;
344 os << "Tpetra::Details::copyConvert: "
345 << "dst.extent(0) = " << dst.extent (0)
346 << " != src.extent(0) = " << src.extent (0)
347 << ".";
348 throw std::invalid_argument (os.str ());
349 }
350 if (static_cast<int> (OutputViewType::rank) > 1 &&
351 dst.extent (1) != src.extent (1)) {
352 std::ostringstream os;
353 os << "Tpetra::Details::copyConvert: "
354 << "dst.extent(1) = " << dst.extent (1)
355 << " != src.extent(1) = " << src.extent (1)
356 << ".";
357 throw std::invalid_argument (os.str ());
358 }
359
360 // Canonicalize the View types in order to avoid redundant instantiations.
361 using output_view_type =
362 Kokkos::View<typename OutputViewType::non_const_data_type,
363 typename OutputViewType::array_layout,
364 typename OutputViewType::device_type>;
365 using input_view_type =
366 Kokkos::View<typename InputViewType::const_data_type,
367 typename InputViewType::array_layout,
368 typename InputViewType::device_type>;
369 CopyConvertImpl<output_view_type, input_view_type>::run (dst, src);
370}
371
372} // namespace Details
373} // namespace Tpetra
374
375#endif // TPETRA_DETAILS_COPYCONVERT_HPP
Nonmember function that computes a residual Computes R = B - A * X.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length....
Namespace Tpetra contains the class and methods constituting the Tpetra library.