Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_copyOffsets.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
11#define TPETRA_DETAILS_COPYOFFSETS_HPP
12
17
18#include "TpetraCore_config.h"
20#include "Kokkos_Core.hpp"
21#include <limits>
22#include <type_traits>
23
24namespace Tpetra {
25namespace Details {
26
27//
28// Implementation details for copyOffsets (see below).
29// Users should skip over this anonymous namespace.
30//
31namespace { // (anonymous)
32
33 // Implementation detail of copyOffsets (see below). Determines
34 // whether integer overflow is impossible on assignment from an
35 // InputType to an OutputType.
36 //
37 // Implicit here is the assumption that both input and output types
38 // are integers.
39 template<class OutputType, class InputType>
40 struct OutputCanFitInput {
41 private:
42 static constexpr bool output_signed = std::is_signed<OutputType>::value;
43 static constexpr bool input_signed = std::is_signed<InputType>::value;
44
45 public:
46 static const bool value = sizeof (OutputType) > sizeof (InputType) ||
47 (sizeof (OutputType) == sizeof (InputType) &&
48 ! output_signed && input_signed);
49 };
50
51 // Avoid warnings for "unsigned integer < 0" comparisons.
52 template<class InputType,
53 bool input_signed = std::is_signed<InputType>::value>
54 struct Negative {};
55
56 template<class InputType>
57 struct Negative<InputType, true> {
58 static KOKKOS_INLINE_FUNCTION bool
59 negative (const InputType src) {
60 return src < InputType (0);
61 }
62 };
63
64 template<class InputType>
65 struct Negative<InputType, false> {
66 static KOKKOS_INLINE_FUNCTION bool
67 negative (const InputType /* src */) {
68 return false;
69 }
70 };
71
72 template<class InputType>
73 KOKKOS_INLINE_FUNCTION bool negative (const InputType src) {
74 return Negative<InputType>::negative (src);
75 }
76
77 template<class OutputType, class InputType>
78 struct OverflowChecker {
79 private:
80 static constexpr bool output_signed = std::is_signed<OutputType>::value;
81 static constexpr bool input_signed = std::is_signed<InputType>::value;
82
83 public:
84 // 1. Signed to unsigned could overflow due to negative numbers.
85 // 2. Larger to smaller could overflow.
86 // 3. Same size but unsigned to signed could overflow.
87 static constexpr bool could_overflow =
88 (! output_signed && input_signed) ||
89 (sizeof (OutputType) < sizeof (InputType)) ||
90 (sizeof (OutputType) == sizeof (InputType) &&
91 output_signed && ! input_signed);
92
93 KOKKOS_INLINE_FUNCTION bool
94 overflows (const InputType src) const
95 {
96 if (! could_overflow) {
97 return false;
98 }
99 else {
100 // Signed to unsigned could overflow due to negative numbers.
101 if (! output_signed && input_signed) {
102 return negative (src);
103 }
104 // We're only comparing InputType with InputType here, so this
105 // should not emit warnings.
106 return src < minDstVal_ || src > maxDstVal_;
107 }
108 }
109
110 private:
111 // If InputType is unsigned and OutputType is signed, casting max
112 // OutputType to InputType could overflow. See #5548.
113 InputType minDstVal_ = input_signed ?
114 std::numeric_limits<OutputType>::min () : OutputType (0);
115 InputType maxDstVal_ = std::numeric_limits<OutputType>::max ();
116 };
117
118
119 template<class OutputViewType, class InputViewType>
120 void
121 errorIfOverflow (const OutputViewType& dst,
122 const InputViewType& src,
123 const size_t overflowCount)
124 {
125 if (overflowCount == 0) {
126 return;
127 }
128
129 std::ostringstream os;
130 const bool plural = overflowCount != size_t (1);
131 os << "copyOffsets: " << overflowCount << " value" <<
132 (plural ? "s" : "") << " in src were too big (in the "
133 "sense of integer overflow) to fit in dst.";
134
135 const bool verbose = Details::Behavior::verbose ();
136 if (verbose) {
137 const size_t maxNumToPrint =
139 const size_t srcLen (src.extent (0));
140 if (srcLen <= maxNumToPrint) {
141 auto dst_h = Kokkos::create_mirror_view (dst);
142 auto src_h = Kokkos::create_mirror_view (src);
143 // DEEP_COPY REVIEW - NOT TESTED
144 Kokkos::deep_copy (src_h, src);
145 // DEEP_COPY REVIEW - NOT TESTED
146 Kokkos::deep_copy (dst_h, dst);
147
148 os << " src: [";
149 for (size_t k = 0; k < srcLen; ++k) {
150 os << src_h[k];
151 if (k + size_t (1) < srcLen) {
152 os << ", ";
153 }
154 }
155 os << "], ";
156
157 os << " dst: [";
158 for (size_t k = 0; k < srcLen; ++k) {
159 os << dst_h[k];
160 if (k + size_t (1) < srcLen) {
161 os << ", ";
162 }
163 }
164 os << "].";
165 }
166 else {
167 os << " src.extent(0) > " << maxNumToPrint << ", Tpetra's "
168 "verbose print count threshold. To increase this, set the "
169 "environment variable TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD "
170 "to the desired threshold and rerun. You do NOT need to "
171 "rebuild Trilinos.";
172 }
173 }
174 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str ());
175 }
176
177 // Implementation detail of copyOffsets (see below).
178 //
179 // Kokkos parallel_reduce functor for copying offset ("ptr") arrays.
180 // Tpetra::Details::FixedHashTable uses this in its "copy"
181 // constructor for converting between different Device types. All
182 // the action happens in the partial specializations for different
183 // values of outputCanFitInput. "Output can fit input" means that
184 // casting the input's value type to the output's value type will
185 // never result in integer overflow.
186 template<class OutputViewType,
187 class InputViewType,
188 const bool outputCanFitInput =
189 OutputCanFitInput<typename OutputViewType::non_const_value_type,
190 typename InputViewType::non_const_value_type>::value>
191 class CopyOffsetsFunctor {};
192
193 // Specialization for when overflow is possible.
194 template<class OutputViewType, class InputViewType>
195 class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
196 public:
197 using execution_space = typename OutputViewType::execution_space;
198 using size_type = typename OutputViewType::size_type;
199 using value_type = size_t;
200
201 using input_value_type = typename InputViewType::non_const_value_type;
202 using output_value_type = typename OutputViewType::non_const_value_type;
203
204 CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
205 dst_ (dst), src_ (src)
206 {
207 static_assert (Kokkos::SpaceAccessibility<
208 typename OutputViewType::memory_space,
209 typename InputViewType::memory_space>::accessible,
210 "CopyOffsetsFunctor (implements copyOffsets): Output "
211 "View's space must be able to access the input View's "
212 "memory space.");
213 }
214
215 KOKKOS_INLINE_FUNCTION void
216 operator () (const size_type i, value_type& overflowCount) const {
217 const input_value_type src_i = src_(i);
218 if (checker_.overflows (src_i)) {
219 ++overflowCount;
220 }
221 dst_(i) = static_cast<output_value_type> (src_i);
222 }
223
224 KOKKOS_INLINE_FUNCTION void
225 operator () (const size_type i) const {
226 const input_value_type src_i = src_(i);
227 dst_(i) = static_cast<output_value_type> (src_i);
228 }
229
230 KOKKOS_INLINE_FUNCTION void init (value_type& overflowCount) const {
231 overflowCount = 0;
232 }
233
234 KOKKOS_INLINE_FUNCTION void
235 join (value_type& result,
236 const value_type& current) const {
237 result += current;
238 }
239
240 private:
241 OutputViewType dst_;
242 InputViewType src_;
243 OverflowChecker<output_value_type, input_value_type> checker_;
244 };
245
246 // Specialization for when overflow is impossible.
247 template<class OutputViewType, class InputViewType>
248 class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
249 public:
250 using execution_space = typename OutputViewType::execution_space;
251 using size_type = typename OutputViewType::size_type;
252 using value_type = size_t;
253
254 CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
255 dst_ (dst),
256 src_ (src)
257 {
258 static_assert (Kokkos::SpaceAccessibility<
259 typename OutputViewType::memory_space,
260 typename InputViewType::memory_space>::accessible,
261 "CopyOffsetsFunctor (implements copyOffsets): Output "
262 "View's space must be able to access the input View's "
263 "memory space.");
264 }
265
266 KOKKOS_INLINE_FUNCTION void
267 operator () (const size_type i, value_type& /* overflowCount */) const {
268 // Overflow is impossible in this case, so there's no need to check.
269 dst_(i) = src_(i);
270 }
271
272 KOKKOS_INLINE_FUNCTION void
273 operator () (const size_type i) const {
274 dst_(i) = src_(i);
275 }
276
277 KOKKOS_INLINE_FUNCTION void init (value_type& overflowCount) const {
278 overflowCount = 0;
279 }
280
281 KOKKOS_INLINE_FUNCTION void
282 join (value_type& /* result */,
283 const value_type& /* current */) const
284 {}
285
286 private:
287 OutputViewType dst_;
288 InputViewType src_;
289 };
290
291 // Implementation detail of copyOffsets (see below).
292 //
293 // We specialize copyOffsets on two different conditions:
294 //
295 // 1. Are the two Views' layouts the same, and do the input and
296 // output Views have the same value type?
297 // 2. Can the output View's execution space access the input View's
298 // memory space?
299 //
300 // If (1) is true, that makes the implementation simple: just call
301 // Kokkos::deep_copy (FixedHashTable always uses the same layout, no
302 // matter the device type). Otherwise, we need a custom copy
303 // functor. If (2) is true, then we can use CopyOffsetsFunctor
304 // directly. Otherwise, we have to copy the input View into the
305 // output View's memory space, before we can use the functor.
306 //
307 template<class OutputViewType,
308 class InputViewType,
309 const bool sameLayoutsSameOffsetTypes =
310 std::is_same<typename OutputViewType::array_layout,
311 typename InputViewType::array_layout>::value &&
312 std::is_same<typename OutputViewType::non_const_value_type,
313 typename InputViewType::non_const_value_type>::value,
314 const bool outputExecSpaceCanAccessInputMemSpace =
315 Kokkos::SpaceAccessibility<
316 typename OutputViewType::memory_space,
317 typename InputViewType::memory_space>::accessible>
318 struct CopyOffsetsImpl {
319 static void run (const OutputViewType& dst, const InputViewType& src);
320 };
321
322 // Specialization for sameLayoutsSameOffsetTypes = true:
323 //
324 // If both input and output Views have the same layout, and both
325 // input and output use the same type for offsets, then we don't
326 // need to check for overflow, and we can use Kokkos::deep_copy
327 // directly. It doesn't matter whether the output execution space
328 // can access the input memory space: Kokkos::deep_copy takes care
329 // of the details.
330 template<class OutputViewType,
331 class InputViewType,
332 const bool outputExecSpaceCanAccessInputMemSpace>
333 struct CopyOffsetsImpl<OutputViewType, InputViewType,
334 true, outputExecSpaceCanAccessInputMemSpace> {
335 static void run (const OutputViewType& dst, const InputViewType& src) {
336 static_assert (std::is_same<typename OutputViewType::non_const_value_type,
337 typename InputViewType::non_const_value_type>::value,
338 "CopyOffsetsImpl (implementation of copyOffsets): In order"
339 " to call this specialization, the input and output must "
340 "use the same offset type.");
341 static_assert (static_cast<int> (OutputViewType::rank) ==
342 static_cast<int> (InputViewType::rank),
343 "CopyOffsetsImpl (implementation of copyOffsets): In order"
344 " to call this specialization, src and dst must have the "
345 "same rank.");
346 static_assert (std::is_same<typename OutputViewType::array_layout,
347 typename InputViewType::array_layout>::value,
348 "CopyOffsetsImpl (implementation of copyOffsets): In order"
349 " to call this specialization, src and dst must have the "
350 "the same array_layout.");
351 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
352 using execution_space = typename OutputViewType::execution_space;
353 Kokkos::deep_copy (execution_space(), dst, src);
354 }
355 };
356
357 // Specializations for sameLayoutsSameOffsetTypes = false:
358 //
359 // If input and output don't have the same layout, or use different
360 // types for offsets, then we can't use Kokkos::deep_copy directly,
361 // and we may have to check for overflow.
362
363 // Specialization for sameLayoutsSameOffsetTypes = false and
364 // outputExecSpaceCanAccessInputMemSpace = true:
365 //
366 // If the output execution space can access the input memory space,
367 // then we can use CopyOffsetsFunctor directly.
368 template<class OutputViewType,
369 class InputViewType>
370 struct CopyOffsetsImpl<OutputViewType, InputViewType,
371 false, true> {
372 static void run (const OutputViewType& dst, const InputViewType& src) {
373 static_assert (static_cast<int> (OutputViewType::rank) ==
374 static_cast<int> (InputViewType::rank),
375 "CopyOffsetsImpl (implementation of copyOffsets): "
376 "src and dst must have the same rank.");
377 constexpr bool sameLayoutsSameOffsetTypes =
378 std::is_same<typename OutputViewType::array_layout,
379 typename InputViewType::array_layout>::value &&
380 std::is_same<typename OutputViewType::non_const_value_type,
381 typename InputViewType::non_const_value_type>::value;
382 static_assert (! sameLayoutsSameOffsetTypes,
383 "CopyOffsetsImpl (implements copyOffsets): In order to "
384 "call this specialization, sameLayoutsSameOffsetTypes "
385 "must be false. That is, either the input and output "
386 "must have different array layouts, or their value types "
387 "must differ.");
388 static_assert (Kokkos::SpaceAccessibility<
389 typename OutputViewType::memory_space,
390 typename InputViewType::memory_space>::accessible,
391 "CopyOffsetsImpl (implements copyOffsets): In order to "
392 "call this specialization, the output View's space must "
393 "be able to access the input View's memory space.");
394 using functor_type = CopyOffsetsFunctor<OutputViewType, InputViewType>;
395 using execution_space = typename OutputViewType::execution_space;
396 using size_type = typename OutputViewType::size_type;
397 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
398
399 const bool debug = Details::Behavior::debug ();
400 if (debug) {
401 size_t overflowCount = 0; // output argument of the reduction
402 Kokkos::parallel_reduce ("Tpetra::Details::copyOffsets",
403 range_type (0, dst.extent (0)),
404 functor_type (dst, src),
405 overflowCount);
406 errorIfOverflow (dst, src, overflowCount);
407 }
408 else {
409 Kokkos::parallel_for ("Tpetra::Details::copyOffsets",
410 range_type (0, dst.extent (0)),
411 functor_type (dst, src));
412 }
413 }
414 };
415
416 // Specialization for sameLayoutsSameOffsetTypes = false and
417 // outputExecSpaceCanAccessInputMemSpace = false.
418 //
419 // If the output execution space canNOT access the input memory
420 // space, then we can't use CopyOffsetsFunctor directly. Instead,
421 // tell Kokkos to copy the input View's data into the output View's
422 // memory space _first_. Since the offset types are different for
423 // this specialization, we can't just call Kokkos::deep_copy
424 // directly between the input and output Views of offsets; that
425 // wouldn't compile.
426 //
427 // This case can and does come up in practice: If the output View's
428 // execution space is Cuda, it cannot currently access host memory
429 // (that's the opposite direction from what UVM allows).
430 // Furthermore, that case specifically requires overflow checking,
431 // since (as of 28 Jan 2016 at least) Kokkos::Cuda uses a smaller
432 // offset type than Kokkos' host spaces.
433 template<class OutputViewType, class InputViewType>
434 struct CopyOffsetsImpl<OutputViewType, InputViewType,
435 false, false> {
436 static void run (const OutputViewType& dst, const InputViewType& src) {
437 static_assert (static_cast<int> (OutputViewType::rank) ==
438 static_cast<int> (InputViewType::rank),
439 "CopyOffsetsImpl (implementation of copyOffsets): In order"
440 " to call this specialization, src and dst must have the "
441 "same rank.");
442 constexpr bool sameLayoutsSameOffsetTypes =
443 std::is_same<typename OutputViewType::array_layout,
444 typename InputViewType::array_layout>::value &&
445 std::is_same<typename OutputViewType::non_const_value_type,
446 typename InputViewType::non_const_value_type>::value;
447 static_assert (! sameLayoutsSameOffsetTypes,
448 "CopyOffsetsImpl (implements copyOffsets): In order to "
449 "call this specialization, sameLayoutsSameOffsetTypes "
450 "must be false. That is, either the input and output "
451 "must have different array layouts, or their value types "
452 "must differ.");
453 using output_space_copy_type =
454 Kokkos::View<typename InputViewType::non_const_value_type*,
455 Kokkos::LayoutLeft, typename OutputViewType::device_type>;
456 using Kokkos::view_alloc;
457 using Kokkos::WithoutInitializing;
458 using execution_space = typename OutputViewType::execution_space;
459 output_space_copy_type
460 outputSpaceCopy (view_alloc ("outputSpace", WithoutInitializing),
461 src.extent (0));
462 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
463 Kokkos::deep_copy (execution_space(), outputSpaceCopy, src);
464
465 // The output View's execution space can access
466 // outputSpaceCopy's data, so we can run the functor now.
467 using functor_type =
468 CopyOffsetsFunctor<OutputViewType, output_space_copy_type>;
469 using size_type = typename OutputViewType::size_type;
470 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
471
472 const bool debug = Details::Behavior::debug ();
473 if (debug) {
474 size_t overflowCount = 0;
475 Kokkos::parallel_reduce ("Tpetra::Details::copyOffsets",
476 range_type (0, dst.extent (0)),
477 functor_type (dst, outputSpaceCopy),
478 overflowCount);
479 errorIfOverflow (dst, src, overflowCount);
480 }
481 else {
482 Kokkos::parallel_for ("Tpetra::Details::copyOffsets",
483 range_type (0, dst.extent (0)),
484 functor_type (dst, outputSpaceCopy));
485 }
486 }
487 };
488} // namespace (anonymous)
489
501template<class OutputViewType, class InputViewType>
502void
503copyOffsets (const OutputViewType& dst, const InputViewType& src)
504{
505 static_assert (Kokkos::is_view<OutputViewType>::value,
506 "OutputViewType (the type of dst) must be a Kokkos::View.");
507 static_assert (Kokkos::is_view<InputViewType>::value,
508 "InputViewType (the type of src) must be a Kokkos::View.");
509 static_assert (std::is_same<typename OutputViewType::value_type,
510 typename OutputViewType::non_const_value_type>::value,
511 "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
512 static_assert (static_cast<int> (OutputViewType::rank) == 1,
513 "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
514 static_assert (static_cast<int> (InputViewType::rank) == 1,
515 "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
516 static_assert (std::is_integral<typename std::decay<decltype (dst(0)) >::type>::value,
517 "The entries of dst must be built-in integers.");
518 static_assert (std::is_integral<typename std::decay<decltype (src(0)) >::type>::value,
519 "The entries of src must be built-in integers.");
520
521 TEUCHOS_TEST_FOR_EXCEPTION
522 (dst.extent (0) != src.extent (0), std::invalid_argument,
523 "copyOffsets: dst.extent(0) = " << dst.extent (0)
524 << " != src.extent(0) = " << src.extent (0) << ".");
525
526 CopyOffsetsImpl<OutputViewType, InputViewType>::run (dst, src);
527}
528
529} // namespace Details
530} // namespace Tpetra
531
532#endif // TPETRA_DETAILS_COPYOFFSETS_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Nonmember function that computes a residual Computes R = B - A * X.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
Namespace Tpetra contains the class and methods constituting the Tpetra library.