Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10// clang-format off
11
12
13// mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
14// incorrect (as() is not a device function) and usually irrelevant
15// (it would only matter if LocalOrdinal were bigger than size_t on a
16// particular platform, which is unlikely).
17
18// KDD Aug 2020: In the permute/pack/unpack functors,
19// the Enabled template parameter is specialized in
20// downstream packages like Stokhos using SFINAE to provide partial
21// specializations based on the scalar type of the SrcView and DstView
22// template parameters. See #7898.
23// Do not remove it before checking with Stokhos and other specializing users.
24
25#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
26#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
27
28#include "Kokkos_Core.hpp"
29#include "Kokkos_ArithTraits.hpp"
30#include <sstream>
31#include <stdexcept>
32
33namespace Tpetra {
34namespace KokkosRefactor {
35namespace Details {
36
42namespace Impl {
43
50template<class IntegerType,
51 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
53 static KOKKOS_INLINE_FUNCTION bool
54 test (const IntegerType x,
55 const IntegerType exclusiveUpperBound);
56};
57
58// Partial specialization for the case where IntegerType IS signed.
59template<class IntegerType>
60struct OutOfBounds<IntegerType, true> {
61 static KOKKOS_INLINE_FUNCTION bool
62 test (const IntegerType x,
63 const IntegerType exclusiveUpperBound)
64 {
65 return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
66 }
67};
68
69// Partial specialization for the case where IntegerType is NOT signed.
70template<class IntegerType>
71struct OutOfBounds<IntegerType, false> {
72 static KOKKOS_INLINE_FUNCTION bool
73 test (const IntegerType x,
74 const IntegerType exclusiveUpperBound)
75 {
76 return x >= exclusiveUpperBound;
77 }
78};
79
82template<class IntegerType>
83KOKKOS_INLINE_FUNCTION bool
84outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
85{
86 return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
87}
88
89} // namespace Impl
90
91 // Functors for implementing packAndPrepare and unpackAndCombine
92 // through parallel_for
93
94 template <typename DstView, typename SrcView, typename IdxView,
95 typename Enabled = void>
96 struct PackArraySingleColumn {
97 typedef typename DstView::execution_space execution_space;
98 typedef typename execution_space::size_type size_type;
99
100 DstView dst;
101 SrcView src;
102 IdxView idx;
103 size_t col;
104
105 PackArraySingleColumn (const DstView& dst_,
106 const SrcView& src_,
107 const IdxView& idx_,
108 const size_t col_) :
109 dst(dst_), src(src_), idx(idx_), col(col_) {}
110
111 KOKKOS_INLINE_FUNCTION void
112 operator() (const size_type k) const {
113 dst(k) = src(idx(k), col);
114 }
115
116 static void
117 pack (const DstView& dst,
118 const SrcView& src,
119 const IdxView& idx,
120 const size_t col,
121 const execution_space &space)
122 {
123 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
124 Kokkos::parallel_for
125 ("Tpetra::MultiVector pack one col",
126 range_type (space, 0, idx.size ()),
127 PackArraySingleColumn (dst, src, idx, col));
128 }
129 };
130
131 template <typename DstView,
132 typename SrcView,
133 typename IdxView,
134 typename SizeType = typename DstView::execution_space::size_type,
135 typename Enabled = void>
136 class PackArraySingleColumnWithBoundsCheck {
137 private:
138 static_assert (Kokkos::is_view<DstView>::value,
139 "DstView must be a Kokkos::View.");
140 static_assert (Kokkos::is_view<SrcView>::value,
141 "SrcView must be a Kokkos::View.");
142 static_assert (Kokkos::is_view<IdxView>::value,
143 "IdxView must be a Kokkos::View.");
144 static_assert (static_cast<int> (DstView::rank) == 1,
145 "DstView must be a rank-1 Kokkos::View.");
146 static_assert (static_cast<int> (SrcView::rank) == 2,
147 "SrcView must be a rank-2 Kokkos::View.");
148 static_assert (static_cast<int> (IdxView::rank) == 1,
149 "IdxView must be a rank-1 Kokkos::View.");
150 static_assert (std::is_integral<SizeType>::value,
151 "SizeType must be a built-in integer type.");
152
153 using execution_space = typename DstView::execution_space;
154
155 public:
156 typedef SizeType size_type;
157 using value_type = size_t;
158
159 private:
160 DstView dst;
161 SrcView src;
162 IdxView idx;
163 size_type col;
164 execution_space space;
165
166 public:
167 PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
168 const SrcView& src_,
169 const IdxView& idx_,
170 const size_type col_) :
171 dst (dst_), src (src_), idx (idx_), col (col_) {}
172
173 KOKKOS_INLINE_FUNCTION void
174 operator() (const size_type k, value_type& lclErrCount) const {
175 using index_type = typename IdxView::non_const_value_type;
176
177 const index_type lclRow = idx(k);
178 if (lclRow < static_cast<index_type> (0) ||
179 lclRow >= static_cast<index_type> (src.extent (0))) {
180 ++lclErrCount;
181 }
182 else {
183 dst(k) = src(lclRow, col);
184 }
185 }
186
187 KOKKOS_INLINE_FUNCTION
188 void init (value_type& initialErrorCount) const {
189 initialErrorCount = 0;
190 }
191
192 KOKKOS_INLINE_FUNCTION void
193 join (value_type& dstErrorCount,
194 const value_type& srcErrorCount) const
195 {
196 dstErrorCount += srcErrorCount;
197 }
198
199 static void
200 pack (const DstView& dst,
201 const SrcView& src,
202 const IdxView& idx,
203 const size_type col,
204 const execution_space &space)
205 {
206 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
207 typedef typename IdxView::non_const_value_type index_type;
208
209 size_t errorCount = 0;
210 Kokkos::parallel_reduce
211 ("Tpetra::MultiVector pack one col debug only",
212 range_type (space, 0, idx.size ()),
213 PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
214 errorCount);
215
216 if (errorCount != 0) {
217 // Go back and find the out-of-bounds entries in the index
218 // array. Performance doesn't matter since we are already in
219 // an error state, so we can do this sequentially, on host.
220 auto idx_h = Kokkos::create_mirror_view (idx);
221
222 // DEEP_COPY REVIEW - NOT TESTED
223 Kokkos::deep_copy (idx_h, idx);
224
225 std::vector<index_type> badIndices;
226 const size_type numInds = idx_h.extent (0);
227 for (size_type k = 0; k < numInds; ++k) {
228 if (idx_h(k) < static_cast<index_type> (0) ||
229 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
230 badIndices.push_back (idx_h(k));
231 }
232 }
233
234 TEUCHOS_TEST_FOR_EXCEPTION
235 (errorCount != badIndices.size (), std::logic_error,
236 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
237 << " != badIndices.size() = " << badIndices.size () << ". This sho"
238 "uld never happen. Please report this to the Tpetra developers.");
239
240 std::ostringstream os;
241 os << "MultiVector single-column pack kernel had "
242 << badIndices.size () << " out-of bounds index/ices. "
243 "Here they are: [";
244 for (size_t k = 0; k < badIndices.size (); ++k) {
245 os << badIndices[k];
246 if (k + 1 < badIndices.size ()) {
247 os << ", ";
248 }
249 }
250 os << "].";
251 throw std::runtime_error (os.str ());
252 }
253 }
254 };
255
256
257 template <typename DstView, typename SrcView, typename IdxView>
258 void
259 pack_array_single_column (const DstView& dst,
260 const SrcView& src,
261 const IdxView& idx,
262 const size_t col,
263 const bool debug,
264 const typename DstView::execution_space &space)
265 {
266 static_assert (Kokkos::is_view<DstView>::value,
267 "DstView must be a Kokkos::View.");
268 static_assert (Kokkos::is_view<SrcView>::value,
269 "SrcView must be a Kokkos::View.");
270 static_assert (Kokkos::is_view<IdxView>::value,
271 "IdxView must be a Kokkos::View.");
272 static_assert (static_cast<int> (DstView::rank) == 1,
273 "DstView must be a rank-1 Kokkos::View.");
274 static_assert (static_cast<int> (SrcView::rank) == 2,
275 "SrcView must be a rank-2 Kokkos::View.");
276 static_assert (static_cast<int> (IdxView::rank) == 1,
277 "IdxView must be a rank-1 Kokkos::View.");
278
279 using execution_space = typename DstView::execution_space;
280
281 static_assert (Kokkos::SpaceAccessibility<execution_space,
282 typename DstView::memory_space>::accessible,
283 "DstView not accessible from execution space");
284 static_assert (Kokkos::SpaceAccessibility<execution_space,
285 typename SrcView::memory_space>::accessible,
286 "SrcView not accessible from execution space");
287 static_assert (Kokkos::SpaceAccessibility<execution_space,
288 typename IdxView::memory_space>::accessible,
289 "IdxView not accessible from execution space");
290
291 if (debug) {
292 typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
293 impl_type::pack (dst, src, idx, col, space);
294 }
295 else {
296 typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
297 impl_type::pack (dst, src, idx, col, space);
298 }
299 }
300
303 template <typename DstView, typename SrcView, typename IdxView>
304 void
305 pack_array_single_column (const DstView& dst,
306 const SrcView& src,
307 const IdxView& idx,
308 const size_t col,
309 const bool debug = true)
310 {
311 pack_array_single_column(dst, src, idx, col, debug, typename DstView::execution_space());
312 }
313
314 template <typename DstView, typename SrcView, typename IdxView,
315 typename Enabled = void>
316 struct PackArrayMultiColumn {
317 using execution_space = typename DstView::execution_space;
318 typedef typename execution_space::size_type size_type;
319
320 DstView dst;
321 SrcView src;
322 IdxView idx;
323 size_t numCols;
324
325 PackArrayMultiColumn (const DstView& dst_,
326 const SrcView& src_,
327 const IdxView& idx_,
328 const size_t numCols_) :
329 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
330
331 KOKKOS_INLINE_FUNCTION void
332 operator() (const size_type k) const {
333 const typename IdxView::value_type localRow = idx(k);
334 const size_t offset = k*numCols;
335 for (size_t j = 0; j < numCols; ++j) {
336 dst(offset + j) = src(localRow, j);
337 }
338 }
339
340 static void pack(const DstView& dst,
341 const SrcView& src,
342 const IdxView& idx,
343 size_t numCols,
344 const execution_space &space) {
345 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
346 Kokkos::parallel_for
347 ("Tpetra::MultiVector pack multicol const stride",
348 range_type (space, 0, idx.size ()),
349 PackArrayMultiColumn (dst, src, idx, numCols));
350 }
351 };
352
353 template <typename DstView,
354 typename SrcView,
355 typename IdxView,
356 typename SizeType = typename DstView::execution_space::size_type,
357 typename Enabled = void>
358 class PackArrayMultiColumnWithBoundsCheck {
359 public:
360 using size_type = SizeType;
361 using value_type = size_t;
362 using execution_space = typename DstView::execution_space;
363
364 private:
365 DstView dst;
366 SrcView src;
367 IdxView idx;
368 size_type numCols;
369
370 public:
371 PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
372 const SrcView& src_,
373 const IdxView& idx_,
374 const size_type numCols_) :
375 dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
376
377 KOKKOS_INLINE_FUNCTION void
378 operator() (const size_type k, value_type& lclErrorCount) const {
379 typedef typename IdxView::non_const_value_type index_type;
380
381 const index_type lclRow = idx(k);
382 if (lclRow < static_cast<index_type> (0) ||
383 lclRow >= static_cast<index_type> (src.extent (0))) {
384 ++lclErrorCount; // failed
385 }
386 else {
387 const size_type offset = k*numCols;
388 for (size_type j = 0; j < numCols; ++j) {
389 dst(offset + j) = src(lclRow, j);
390 }
391 }
392 }
393
394 KOKKOS_INLINE_FUNCTION
395 void init (value_type& initialErrorCount) const {
396 initialErrorCount = 0;
397 }
398
399 KOKKOS_INLINE_FUNCTION void
400 join (value_type& dstErrorCount,
401 const value_type& srcErrorCount) const
402 {
403 dstErrorCount += srcErrorCount;
404 }
405
406 static void
407 pack (const DstView& dst,
408 const SrcView& src,
409 const IdxView& idx,
410 const size_type numCols,
411 const execution_space &space)
412 {
413 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
414 typedef typename IdxView::non_const_value_type index_type;
415
416 size_t errorCount = 0;
417 Kokkos::parallel_reduce
418 ("Tpetra::MultiVector pack multicol const stride debug only",
419 range_type (space, 0, idx.size ()),
420 PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
421 errorCount);
422 if (errorCount != 0) {
423 // Go back and find the out-of-bounds entries in the index
424 // array. Performance doesn't matter since we are already in
425 // an error state, so we can do this sequentially, on host.
426 auto idx_h = Kokkos::create_mirror_view (idx);
427
428 // DEEP_COPY REVIEW - NOT TESTED
429 Kokkos::deep_copy (idx_h, idx);
430
431 std::vector<index_type> badIndices;
432 const size_type numInds = idx_h.extent (0);
433 for (size_type k = 0; k < numInds; ++k) {
434 if (idx_h(k) < static_cast<index_type> (0) ||
435 idx_h(k) >= static_cast<index_type> (src.extent (0))) {
436 badIndices.push_back (idx_h(k));
437 }
438 }
439
440 TEUCHOS_TEST_FOR_EXCEPTION
441 (errorCount != badIndices.size (), std::logic_error,
442 "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
443 << " != badIndices.size() = " << badIndices.size () << ". This sho"
444 "uld never happen. Please report this to the Tpetra developers.");
445
446 std::ostringstream os;
447 os << "Tpetra::MultiVector multiple-column pack kernel had "
448 << badIndices.size () << " out-of bounds index/ices (errorCount = "
449 << errorCount << "): [";
450 for (size_t k = 0; k < badIndices.size (); ++k) {
451 os << badIndices[k];
452 if (k + 1 < badIndices.size ()) {
453 os << ", ";
454 }
455 }
456 os << "].";
457 throw std::runtime_error (os.str ());
458 }
459 }
460 };
461
462
463 template <typename DstView,
464 typename SrcView,
465 typename IdxView>
466 void
467 pack_array_multi_column (const DstView& dst,
468 const SrcView& src,
469 const IdxView& idx,
470 const size_t numCols,
471 const bool debug,
472 const typename DstView::execution_space &space)
473 {
474 static_assert (Kokkos::is_view<DstView>::value,
475 "DstView must be a Kokkos::View.");
476 static_assert (Kokkos::is_view<SrcView>::value,
477 "SrcView must be a Kokkos::View.");
478 static_assert (Kokkos::is_view<IdxView>::value,
479 "IdxView must be a Kokkos::View.");
480 static_assert (static_cast<int> (DstView::rank) == 1,
481 "DstView must be a rank-1 Kokkos::View.");
482 static_assert (static_cast<int> (SrcView::rank) == 2,
483 "SrcView must be a rank-2 Kokkos::View.");
484 static_assert (static_cast<int> (IdxView::rank) == 1,
485 "IdxView must be a rank-1 Kokkos::View.");
486
487 using execution_space = typename DstView::execution_space;
488
489 static_assert (Kokkos::SpaceAccessibility<execution_space,
490 typename DstView::memory_space>::accessible,
491 "DstView not accessible from execution space");
492 static_assert (Kokkos::SpaceAccessibility<execution_space,
493 typename SrcView::memory_space>::accessible,
494 "SrcView not accessible from execution space");
495 static_assert (Kokkos::SpaceAccessibility<execution_space,
496 typename IdxView::memory_space>::accessible,
497 "IdxView not accessible from execution space");
498
499 if (debug) {
500 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
501 SrcView, IdxView> impl_type;
502 impl_type::pack (dst, src, idx, numCols, space);
503 }
504 else {
505 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
506 impl_type::pack (dst, src, idx, numCols, space);
507 }
508 }
509
510 template <typename DstView,
511 typename SrcView,
512 typename IdxView>
513 void
514 pack_array_multi_column (const DstView& dst,
515 const SrcView& src,
516 const IdxView& idx,
517 const size_t numCols,
518 const bool debug = true) {
519 pack_array_multi_column(dst, src, idx, numCols, debug, typename DstView::execution_space());
520 }
521
522 template <typename DstView, typename SrcView, typename IdxView,
523 typename ColView, typename Enabled = void>
524 struct PackArrayMultiColumnVariableStride {
525 using execution_space = typename DstView::execution_space;
526 typedef typename execution_space::size_type size_type;
527
528 DstView dst;
529 SrcView src;
530 IdxView idx;
531 ColView col;
532 size_t numCols;
533
534 PackArrayMultiColumnVariableStride (const DstView& dst_,
535 const SrcView& src_,
536 const IdxView& idx_,
537 const ColView& col_,
538 const size_t numCols_) :
539 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
540
541 KOKKOS_INLINE_FUNCTION
542 void operator() (const size_type k) const {
543 const typename IdxView::value_type localRow = idx(k);
544 const size_t offset = k*numCols;
545 for (size_t j = 0; j < numCols; ++j) {
546 dst(offset + j) = src(localRow, col(j));
547 }
548 }
549
550 static void pack(const DstView& dst,
551 const SrcView& src,
552 const IdxView& idx,
553 const ColView& col,
554 size_t numCols,
555 const execution_space &space) {
556 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
557 Kokkos::parallel_for
558 ("Tpetra::MultiVector pack multicol var stride",
559 range_type (space, 0, idx.size ()),
560 PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
561 }
562 };
563
564 template <typename DstView,
565 typename SrcView,
566 typename IdxView,
567 typename ColView,
568 typename SizeType = typename DstView::execution_space::size_type,
569 typename Enabled = void>
570 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
571 public:
572 using size_type = SizeType;
573 using value_type = size_t;
574 using execution_space = typename DstView::execution_space;
575
576 private:
577 DstView dst;
578 SrcView src;
579 IdxView idx;
580 ColView col;
581 size_type numCols;
582
583 public:
584 PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
585 const SrcView& src_,
586 const IdxView& idx_,
587 const ColView& col_,
588 const size_type numCols_) :
589 dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
590
591 KOKKOS_INLINE_FUNCTION void
592 operator() (const size_type k, value_type& lclErrorCount) const {
593 typedef typename IdxView::non_const_value_type row_index_type;
594 typedef typename ColView::non_const_value_type col_index_type;
595
596 const row_index_type lclRow = idx(k);
597 if (lclRow < static_cast<row_index_type> (0) ||
598 lclRow >= static_cast<row_index_type> (src.extent (0))) {
599 ++lclErrorCount = 0;
600 }
601 else {
602 const size_type offset = k*numCols;
603 for (size_type j = 0; j < numCols; ++j) {
604 const col_index_type lclCol = col(j);
605 if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
606 ++lclErrorCount = 0;
607 }
608 else { // all indices are valid; do the assignment
609 dst(offset + j) = src(lclRow, lclCol);
610 }
611 }
612 }
613 }
614
615 KOKKOS_INLINE_FUNCTION void
616 init (value_type& initialErrorCount) const {
617 initialErrorCount = 0;
618 }
619
620 KOKKOS_INLINE_FUNCTION void
621 join (value_type& dstErrorCount,
622 const value_type& srcErrorCount) const
623 {
624 dstErrorCount += srcErrorCount;
625 }
626
627 static void
628 pack (const DstView& dst,
629 const SrcView& src,
630 const IdxView& idx,
631 const ColView& col,
632 const size_type numCols,
633 const execution_space &space)
634 {
635 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
636 using row_index_type = typename IdxView::non_const_value_type;
637 using col_index_type = typename ColView::non_const_value_type;
638
639 size_t errorCount = 0;
640 Kokkos::parallel_reduce
641 ("Tpetra::MultiVector pack multicol var stride debug only",
642 range_type (space, 0, idx.size ()),
643 PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
644 col, numCols),
645 errorCount);
646 if (errorCount != 0) {
647 constexpr size_t maxNumBadIndicesToPrint = 100;
648
649 std::ostringstream os; // for error reporting
650 os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
651 "found " << errorCount
652 << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
653
654 // Go back and find any out-of-bounds entries in the array of
655 // row indices. Performance doesn't matter since we are already
656 // in an error state, so we can do this sequentially, on host.
657 auto idx_h = Kokkos::create_mirror_view (idx);
658
659 // DEEP_COPY REVIEW - NOT TESTED
660 Kokkos::deep_copy (idx_h, idx);
661
662 std::vector<row_index_type> badRows;
663 const size_type numRowInds = idx_h.extent (0);
664 for (size_type k = 0; k < numRowInds; ++k) {
665 if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
666 badRows.push_back (idx_h(k));
667 }
668 }
669
670 if (badRows.size () != 0) {
671 os << badRows.size () << " out-of-bounds row ind"
672 << (badRows.size () != size_t (1) ? "ices" : "ex");
673 if (badRows.size () <= maxNumBadIndicesToPrint) {
674 os << ": [";
675 for (size_t k = 0; k < badRows.size (); ++k) {
676 os << badRows[k];
677 if (k + 1 < badRows.size ()) {
678 os << ", ";
679 }
680 }
681 os << "]. ";
682 }
683 else {
684 os << ". ";
685 }
686 }
687 else {
688 os << "No out-of-bounds row indices. ";
689 }
690
691 // Go back and find any out-of-bounds entries in the array
692 // of column indices.
693 auto col_h = Kokkos::create_mirror_view (col);
694
695 // DEEP_COPY REVIEW - NOT TESTED
696 Kokkos::deep_copy (col_h, col);
697
698 std::vector<col_index_type> badCols;
699 const size_type numColInds = col_h.extent (0);
700 for (size_type k = 0; k < numColInds; ++k) {
701 if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
702 badCols.push_back (col_h(k));
703 }
704 }
705
706 if (badCols.size () != 0) {
707 os << badCols.size () << " out-of-bounds column ind"
708 << (badCols.size () != size_t (1) ? "ices" : "ex");
709 if (badCols.size () <= maxNumBadIndicesToPrint) {
710 os << ": [";
711 for (size_t k = 0; k < badCols.size (); ++k) {
712 os << badCols[k];
713 if (k + 1 < badCols.size ()) {
714 os << ", ";
715 }
716 }
717 os << "]. ";
718 }
719 else {
720 os << ". ";
721 }
722 }
723 else {
724 os << "No out-of-bounds column indices. ";
725 }
726
727 TEUCHOS_TEST_FOR_EXCEPTION
728 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
729 std::logic_error, "Tpetra::MultiVector variable stride pack "
730 "kernel reports errorCount=" << errorCount << ", but we failed "
731 "to find any bad rows or columns. This should never happen. "
732 "Please report this to the Tpetra developers.");
733
734 throw std::runtime_error (os.str ());
735 } // hasErr
736 }
737 };
738
739 template <typename DstView,
740 typename SrcView,
741 typename IdxView,
742 typename ColView>
743 void
744 pack_array_multi_column_variable_stride (const DstView& dst,
745 const SrcView& src,
746 const IdxView& idx,
747 const ColView& col,
748 const size_t numCols,
749 const bool debug,
750 const typename DstView::execution_space &space)
751 {
752 static_assert (Kokkos::is_view<DstView>::value,
753 "DstView must be a Kokkos::View.");
754 static_assert (Kokkos::is_view<SrcView>::value,
755 "SrcView must be a Kokkos::View.");
756 static_assert (Kokkos::is_view<IdxView>::value,
757 "IdxView must be a Kokkos::View.");
758 static_assert (Kokkos::is_view<ColView>::value,
759 "ColView must be a Kokkos::View.");
760 static_assert (static_cast<int> (DstView::rank) == 1,
761 "DstView must be a rank-1 Kokkos::View.");
762 static_assert (static_cast<int> (SrcView::rank) == 2,
763 "SrcView must be a rank-2 Kokkos::View.");
764 static_assert (static_cast<int> (IdxView::rank) == 1,
765 "IdxView must be a rank-1 Kokkos::View.");
766 static_assert (static_cast<int> (ColView::rank) == 1,
767 "ColView must be a rank-1 Kokkos::View.");
768
769 using execution_space = typename DstView::execution_space;
770
771 static_assert (Kokkos::SpaceAccessibility<execution_space,
772 typename DstView::memory_space>::accessible,
773 "DstView not accessible from execution space");
774 static_assert (Kokkos::SpaceAccessibility<execution_space,
775 typename SrcView::memory_space>::accessible,
776 "SrcView not accessible from execution space");
777 static_assert (Kokkos::SpaceAccessibility<execution_space,
778 typename IdxView::memory_space>::accessible,
779 "IdxView not accessible from execution space");
780
781 if (debug) {
782 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
783 SrcView, IdxView, ColView> impl_type;
784 impl_type::pack (dst, src, idx, col, numCols, space);
785 }
786 else {
787 typedef PackArrayMultiColumnVariableStride<DstView,
788 SrcView, IdxView, ColView> impl_type;
789 impl_type::pack (dst, src, idx, col, numCols, space);
790 }
791 }
792
793 template <typename DstView,
794 typename SrcView,
795 typename IdxView,
796 typename ColView>
797 void
798 pack_array_multi_column_variable_stride (const DstView& dst,
799 const SrcView& src,
800 const IdxView& idx,
801 const ColView& col,
802 const size_t numCols,
803 const bool debug = true) {
804 pack_array_multi_column_variable_stride(dst, src, idx, col, numCols, debug,
805 typename DstView::execution_space());
806 }
807
808 // Tag types to indicate whether to use atomic updates in the
809 // various CombineMode "Op"s.
810 struct atomic_tag {};
811 struct nonatomic_tag {};
812
813 struct AddOp {
814 template<class SC>
815 KOKKOS_INLINE_FUNCTION
816 void operator() (atomic_tag, SC& dest, const SC& src) const {
817 Kokkos::atomic_add (&dest, src);
818 }
819
820 template<class SC>
821 KOKKOS_INLINE_FUNCTION
822 void operator() (nonatomic_tag, SC& dest, const SC& src) const {
823 dest += src;
824 }
825 };
826
827 struct InsertOp {
828 // There's no point to using Kokkos::atomic_assign for the REPLACE
829 // or INSERT CombineModes, since this is not a well-defined
830 // reduction for MultiVector anyway. See GitHub Issue #4417
831 // (which this fixes).
832 template<class SC>
833 KOKKOS_INLINE_FUNCTION
834 void operator() (atomic_tag, SC& dest, const SC& src) const {
835 dest = src;
836 }
837
838 template<class SC>
839 KOKKOS_INLINE_FUNCTION
840 void operator() (nonatomic_tag, SC& dest, const SC& src) const {
841 dest = src;
842 }
843 };
844
845 struct AbsMaxOp {
846 template <class Scalar>
847 struct AbsMaxHelper{
848 Scalar value;
849
850 KOKKOS_FUNCTION AbsMaxHelper& operator+=(AbsMaxHelper const& rhs) {
851 auto lhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(value);
852 auto rhs_abs_value = Kokkos::ArithTraits<Scalar>::abs(rhs.value);
853 value = lhs_abs_value > rhs_abs_value ? lhs_abs_value : rhs_abs_value;
854 return *this;
855 }
856
857 KOKKOS_FUNCTION AbsMaxHelper operator+(AbsMaxHelper const& rhs) const {
858 AbsMaxHelper ret = *this;
859 ret += rhs;
860 return ret;
861 }
862 };
863
864 template <typename SC>
865 KOKKOS_INLINE_FUNCTION
866 void operator() (atomic_tag, SC& dst, const SC& src) const {
867 Kokkos::atomic_add(reinterpret_cast<AbsMaxHelper<SC>*>(&dst), AbsMaxHelper<SC>{src});
868 }
869
870 template <typename SC>
871 KOKKOS_INLINE_FUNCTION
872 void operator() (nonatomic_tag, SC& dst, const SC& src) const {
873 auto dst_abs_value = Kokkos::ArithTraits<SC>::abs(dst);
874 auto src_abs_value = Kokkos::ArithTraits<SC>::abs(src);
875 dst = dst_abs_value > src_abs_value ? dst_abs_value : src_abs_value;
876 }
877 };
878
879 template <typename ExecutionSpace,
880 typename DstView,
881 typename SrcView,
882 typename IdxView,
883 typename Op,
884 typename Enabled = void>
885 class UnpackArrayMultiColumn {
886 private:
887 static_assert (Kokkos::is_view<DstView>::value,
888 "DstView must be a Kokkos::View.");
889 static_assert (Kokkos::is_view<SrcView>::value,
890 "SrcView must be a Kokkos::View.");
891 static_assert (Kokkos::is_view<IdxView>::value,
892 "IdxView must be a Kokkos::View.");
893 static_assert (static_cast<int> (DstView::rank) == 2,
894 "DstView must be a rank-2 Kokkos::View.");
895 static_assert (static_cast<int> (SrcView::rank) == 1,
896 "SrcView must be a rank-1 Kokkos::View.");
897 static_assert (static_cast<int> (IdxView::rank) == 1,
898 "IdxView must be a rank-1 Kokkos::View.");
899
900 public:
901 typedef typename ExecutionSpace::execution_space execution_space;
902 typedef typename execution_space::size_type size_type;
903
904 private:
905 DstView dst;
906 SrcView src;
907 IdxView idx;
908 Op op;
909 size_t numCols;
910
911 public:
912 UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
913 const DstView& dst_,
914 const SrcView& src_,
915 const IdxView& idx_,
916 const Op& op_,
917 const size_t numCols_) :
918 dst (dst_),
919 src (src_),
920 idx (idx_),
921 op (op_),
922 numCols (numCols_)
923 {}
924
925 template<class TagType>
926 KOKKOS_INLINE_FUNCTION void
927 operator() (TagType tag, const size_type k) const
928 {
929 static_assert
930 (std::is_same<TagType, atomic_tag>::value ||
931 std::is_same<TagType, nonatomic_tag>::value,
932 "TagType must be atomic_tag or nonatomic_tag.");
933
934 const typename IdxView::value_type localRow = idx(k);
935 const size_t offset = k*numCols;
936 for (size_t j = 0; j < numCols; ++j) {
937 op (tag, dst(localRow, j), src(offset+j));
938 }
939 }
940
941 static void
942 unpack (const ExecutionSpace& execSpace,
943 const DstView& dst,
944 const SrcView& src,
945 const IdxView& idx,
946 const Op& op,
947 const size_t numCols,
948 const bool use_atomic_updates)
949 {
950 if (use_atomic_updates) {
951 using range_type =
952 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
953 Kokkos::parallel_for
954 ("Tpetra::MultiVector unpack const stride atomic",
955 range_type (0, idx.size ()),
956 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
957 }
958 else {
959 using range_type =
960 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
961 Kokkos::parallel_for
962 ("Tpetra::MultiVector unpack const stride nonatomic",
963 range_type (0, idx.size ()),
964 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
965 }
966 }
967 };
968
969 template <typename ExecutionSpace,
970 typename DstView,
971 typename SrcView,
972 typename IdxView,
973 typename Op,
974 typename SizeType = typename ExecutionSpace::execution_space::size_type,
975 typename Enabled = void>
976 class UnpackArrayMultiColumnWithBoundsCheck {
977 private:
978 static_assert (Kokkos::is_view<DstView>::value,
979 "DstView must be a Kokkos::View.");
980 static_assert (Kokkos::is_view<SrcView>::value,
981 "SrcView must be a Kokkos::View.");
982 static_assert (Kokkos::is_view<IdxView>::value,
983 "IdxView must be a Kokkos::View.");
984 static_assert (static_cast<int> (DstView::rank) == 2,
985 "DstView must be a rank-2 Kokkos::View.");
986 static_assert (static_cast<int> (SrcView::rank) == 1,
987 "SrcView must be a rank-1 Kokkos::View.");
988 static_assert (static_cast<int> (IdxView::rank) == 1,
989 "IdxView must be a rank-1 Kokkos::View.");
990 static_assert (std::is_integral<SizeType>::value,
991 "SizeType must be a built-in integer type.");
992
993 public:
994 using execution_space = typename ExecutionSpace::execution_space;
995 using size_type = SizeType;
996 using value_type = size_t;
997
998 private:
999 DstView dst;
1000 SrcView src;
1001 IdxView idx;
1002 Op op;
1003 size_type numCols;
1004
1005 public:
1006 UnpackArrayMultiColumnWithBoundsCheck (const ExecutionSpace& /* execSpace */,
1007 const DstView& dst_,
1008 const SrcView& src_,
1009 const IdxView& idx_,
1010 const Op& op_,
1011 const size_type numCols_) :
1012 dst (dst_),
1013 src (src_),
1014 idx (idx_),
1015 op (op_),
1016 numCols (numCols_)
1017 {}
1018
1019 template<class TagType>
1020 KOKKOS_INLINE_FUNCTION void
1021 operator() (TagType tag,
1022 const size_type k,
1023 size_t& lclErrCount) const
1024 {
1025 static_assert
1026 (std::is_same<TagType, atomic_tag>::value ||
1027 std::is_same<TagType, nonatomic_tag>::value,
1028 "TagType must be atomic_tag or nonatomic_tag.");
1029 using index_type = typename IdxView::non_const_value_type;
1030
1031 const index_type lclRow = idx(k);
1032 if (lclRow < static_cast<index_type> (0) ||
1033 lclRow >= static_cast<index_type> (dst.extent (0))) {
1034 ++lclErrCount;
1035 }
1036 else {
1037 const size_type offset = k*numCols;
1038 for (size_type j = 0; j < numCols; ++j) {
1039 op (tag, dst(lclRow,j), src(offset+j));
1040 }
1041 }
1042 }
1043
1044 template<class TagType>
1045 KOKKOS_INLINE_FUNCTION void
1046 init (TagType, size_t& initialErrorCount) const {
1047 initialErrorCount = 0;
1048 }
1049
1050 template<class TagType>
1051 KOKKOS_INLINE_FUNCTION void
1052 join (TagType,
1053 size_t& dstErrorCount,
1054 const size_t& srcErrorCount) const
1055 {
1056 dstErrorCount += srcErrorCount;
1057 }
1058
1059 static void
1060 unpack (const ExecutionSpace& execSpace,
1061 const DstView& dst,
1062 const SrcView& src,
1063 const IdxView& idx,
1064 const Op& op,
1065 const size_type numCols,
1066 const bool use_atomic_updates)
1067 {
1068 using index_type = typename IdxView::non_const_value_type;
1069
1070 size_t errorCount = 0;
1071 if (use_atomic_updates) {
1072 using range_type =
1073 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1074 Kokkos::parallel_reduce
1075 ("Tpetra::MultiVector unpack multicol const stride atomic debug only",
1076 range_type (0, idx.size ()),
1077 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1078 idx, op, numCols),
1079 errorCount);
1080 }
1081 else {
1082 using range_type =
1083 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1084 Kokkos::parallel_reduce
1085 ("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1086 range_type (0, idx.size ()),
1087 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1088 idx, op, numCols),
1089 errorCount);
1090 }
1091
1092 if (errorCount != 0) {
1093 // Go back and find the out-of-bounds entries in the index
1094 // array. Performance doesn't matter since we are already in
1095 // an error state, so we can do this sequentially, on host.
1096 auto idx_h = Kokkos::create_mirror_view (idx);
1097
1098 // DEEP_COPY REVIEW - NOT TESTED
1099 Kokkos::deep_copy (idx_h, idx);
1100
1101 std::vector<index_type> badIndices;
1102 const size_type numInds = idx_h.extent (0);
1103 for (size_type k = 0; k < numInds; ++k) {
1104 if (idx_h(k) < static_cast<index_type> (0) ||
1105 idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1106 badIndices.push_back (idx_h(k));
1107 }
1108 }
1109
1110 if (errorCount != badIndices.size ()) {
1111 std::ostringstream os;
1112 os << "MultiVector unpack kernel: errorCount = " << errorCount
1113 << " != badIndices.size() = " << badIndices.size ()
1114 << ". This should never happen. "
1115 "Please report this to the Tpetra developers.";
1116 throw std::logic_error (os.str ());
1117 }
1118
1119 std::ostringstream os;
1120 os << "MultiVector unpack kernel had " << badIndices.size ()
1121 << " out-of bounds index/ices. Here they are: [";
1122 for (size_t k = 0; k < badIndices.size (); ++k) {
1123 os << badIndices[k];
1124 if (k + 1 < badIndices.size ()) {
1125 os << ", ";
1126 }
1127 }
1128 os << "].";
1129 throw std::runtime_error (os.str ());
1130 }
1131 }
1132 };
1133
1134 template <typename ExecutionSpace,
1135 typename DstView,
1136 typename SrcView,
1137 typename IdxView,
1138 typename Op>
1139 void
1140 unpack_array_multi_column (const ExecutionSpace& execSpace,
1141 const DstView& dst,
1142 const SrcView& src,
1143 const IdxView& idx,
1144 const Op& op,
1145 const size_t numCols,
1146 const bool use_atomic_updates,
1147 const bool debug)
1148 {
1149 static_assert (Kokkos::is_view<DstView>::value,
1150 "DstView must be a Kokkos::View.");
1151 static_assert (Kokkos::is_view<SrcView>::value,
1152 "SrcView must be a Kokkos::View.");
1153 static_assert (Kokkos::is_view<IdxView>::value,
1154 "IdxView must be a Kokkos::View.");
1155 static_assert (static_cast<int> (DstView::rank) == 2,
1156 "DstView must be a rank-2 Kokkos::View.");
1157 static_assert (static_cast<int> (SrcView::rank) == 1,
1158 "SrcView must be a rank-1 Kokkos::View.");
1159 static_assert (static_cast<int> (IdxView::rank) == 1,
1160 "IdxView must be a rank-1 Kokkos::View.");
1161
1162 if (debug) {
1163 typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1164 DstView, SrcView, IdxView, Op> impl_type;
1165 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1166 use_atomic_updates);
1167 }
1168 else {
1169 typedef UnpackArrayMultiColumn<ExecutionSpace,
1170 DstView, SrcView, IdxView, Op> impl_type;
1171 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1172 use_atomic_updates);
1173 }
1174 }
1175
1176 template <typename ExecutionSpace,
1177 typename DstView,
1178 typename SrcView,
1179 typename IdxView,
1180 typename ColView,
1181 typename Op,
1182 typename Enabled = void>
1183 class UnpackArrayMultiColumnVariableStride {
1184 private:
1185 static_assert (Kokkos::is_view<DstView>::value,
1186 "DstView must be a Kokkos::View.");
1187 static_assert (Kokkos::is_view<SrcView>::value,
1188 "SrcView must be a Kokkos::View.");
1189 static_assert (Kokkos::is_view<IdxView>::value,
1190 "IdxView must be a Kokkos::View.");
1191 static_assert (Kokkos::is_view<ColView>::value,
1192 "ColView must be a Kokkos::View.");
1193 static_assert (static_cast<int> (DstView::rank) == 2,
1194 "DstView must be a rank-2 Kokkos::View.");
1195 static_assert (static_cast<int> (SrcView::rank) == 1,
1196 "SrcView must be a rank-1 Kokkos::View.");
1197 static_assert (static_cast<int> (IdxView::rank) == 1,
1198 "IdxView must be a rank-1 Kokkos::View.");
1199 static_assert (static_cast<int> (ColView::rank) == 1,
1200 "ColView must be a rank-1 Kokkos::View.");
1201
1202 public:
1203 using execution_space = typename ExecutionSpace::execution_space;
1204 using size_type = typename execution_space::size_type;
1205
1206 private:
1207 DstView dst;
1208 SrcView src;
1209 IdxView idx;
1210 ColView col;
1211 Op op;
1212 size_t numCols;
1213
1214 public:
1215 UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
1216 const DstView& dst_,
1217 const SrcView& src_,
1218 const IdxView& idx_,
1219 const ColView& col_,
1220 const Op& op_,
1221 const size_t numCols_) :
1222 dst (dst_),
1223 src (src_),
1224 idx (idx_),
1225 col (col_),
1226 op (op_),
1227 numCols (numCols_)
1228 {}
1229
1230 template<class TagType>
1231 KOKKOS_INLINE_FUNCTION void
1232 operator() (TagType tag, const size_type k) const
1233 {
1234 static_assert
1235 (std::is_same<TagType, atomic_tag>::value ||
1236 std::is_same<TagType, nonatomic_tag>::value,
1237 "TagType must be atomic_tag or nonatomic_tag.");
1238
1239 const typename IdxView::value_type localRow = idx(k);
1240 const size_t offset = k*numCols;
1241 for (size_t j = 0; j < numCols; ++j) {
1242 op (tag, dst(localRow, col(j)), src(offset+j));
1243 }
1244 }
1245
1246 static void
1247 unpack (const ExecutionSpace& execSpace,
1248 const DstView& dst,
1249 const SrcView& src,
1250 const IdxView& idx,
1251 const ColView& col,
1252 const Op& op,
1253 const size_t numCols,
1254 const bool use_atomic_updates)
1255 {
1256 if (use_atomic_updates) {
1257 using range_type =
1258 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1259 Kokkos::parallel_for
1260 ("Tpetra::MultiVector unpack var stride atomic",
1261 range_type (0, idx.size ()),
1262 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1263 idx, col, op, numCols));
1264 }
1265 else {
1266 using range_type =
1267 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1268 Kokkos::parallel_for
1269 ("Tpetra::MultiVector unpack var stride nonatomic",
1270 range_type (0, idx.size ()),
1271 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1272 idx, col, op, numCols));
1273 }
1274 }
1275 };
1276
1277 template <typename ExecutionSpace,
1278 typename DstView,
1279 typename SrcView,
1280 typename IdxView,
1281 typename ColView,
1282 typename Op,
1283 typename SizeType = typename ExecutionSpace::execution_space::size_type,
1284 typename Enabled = void>
1285 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1286 private:
1287 static_assert (Kokkos::is_view<DstView>::value,
1288 "DstView must be a Kokkos::View.");
1289 static_assert (Kokkos::is_view<SrcView>::value,
1290 "SrcView must be a Kokkos::View.");
1291 static_assert (Kokkos::is_view<IdxView>::value,
1292 "IdxView must be a Kokkos::View.");
1293 static_assert (Kokkos::is_view<ColView>::value,
1294 "ColView must be a Kokkos::View.");
1295 static_assert (static_cast<int> (DstView::rank) == 2,
1296 "DstView must be a rank-2 Kokkos::View.");
1297 static_assert (static_cast<int> (SrcView::rank) == 1,
1298 "SrcView must be a rank-1 Kokkos::View.");
1299 static_assert (static_cast<int> (IdxView::rank) == 1,
1300 "IdxView must be a rank-1 Kokkos::View.");
1301 static_assert (static_cast<int> (ColView::rank) == 1,
1302 "ColView must be a rank-1 Kokkos::View.");
1303 static_assert (std::is_integral<SizeType>::value,
1304 "SizeType must be a built-in integer type.");
1305
1306 public:
1307 using execution_space = typename ExecutionSpace::execution_space;
1308 using size_type = SizeType;
1309 using value_type = size_t;
1310
1311 private:
1312 DstView dst;
1313 SrcView src;
1314 IdxView idx;
1315 ColView col;
1316 Op op;
1317 size_type numCols;
1318
1319 public:
1320 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1321 (const ExecutionSpace& /* execSpace */,
1322 const DstView& dst_,
1323 const SrcView& src_,
1324 const IdxView& idx_,
1325 const ColView& col_,
1326 const Op& op_,
1327 const size_t numCols_) :
1328 dst (dst_),
1329 src (src_),
1330 idx (idx_),
1331 col (col_),
1332 op (op_),
1333 numCols (numCols_)
1334 {}
1335
1336 template<class TagType>
1337 KOKKOS_INLINE_FUNCTION void
1338 operator() (TagType tag,
1339 const size_type k,
1340 value_type& lclErrorCount) const
1341 {
1342 static_assert
1343 (std::is_same<TagType, atomic_tag>::value ||
1344 std::is_same<TagType, nonatomic_tag>::value,
1345 "TagType must be atomic_tag or nonatomic_tag.");
1346 using row_index_type = typename IdxView::non_const_value_type;
1347 using col_index_type = typename ColView::non_const_value_type;
1348
1349 const row_index_type lclRow = idx(k);
1350 if (lclRow < static_cast<row_index_type> (0) ||
1351 lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1352 ++lclErrorCount;
1353 }
1354 else {
1355 const size_type offset = k * numCols;
1356 for (size_type j = 0; j < numCols; ++j) {
1357 const col_index_type lclCol = col(j);
1358 if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1359 ++lclErrorCount;
1360 }
1361 else { // all indices are valid; apply the op
1362 op (tag, dst(lclRow, col(j)), src(offset+j));
1363 }
1364 }
1365 }
1366 }
1367
1368 KOKKOS_INLINE_FUNCTION void
1369 init (value_type& initialErrorCount) const {
1370 initialErrorCount = 0;
1371 }
1372
1373 KOKKOS_INLINE_FUNCTION void
1374 join (value_type& dstErrorCount,
1375 const value_type& srcErrorCount) const
1376 {
1377 dstErrorCount += srcErrorCount;
1378 }
1379
1380 static void
1381 unpack (const ExecutionSpace& execSpace,
1382 const DstView& dst,
1383 const SrcView& src,
1384 const IdxView& idx,
1385 const ColView& col,
1386 const Op& op,
1387 const size_type numCols,
1388 const bool use_atomic_updates)
1389 {
1390 using row_index_type = typename IdxView::non_const_value_type;
1391 using col_index_type = typename ColView::non_const_value_type;
1392
1393 size_t errorCount = 0;
1394 if (use_atomic_updates) {
1395 using range_type =
1396 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1397 Kokkos::parallel_reduce
1398 ("Tpetra::MultiVector unpack var stride atomic debug only",
1399 range_type (0, idx.size ()),
1400 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1401 (execSpace, dst, src, idx, col, op, numCols),
1402 errorCount);
1403 }
1404 else {
1405 using range_type =
1406 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1407 Kokkos::parallel_reduce
1408 ("Tpetra::MultiVector unpack var stride nonatomic debug only",
1409 range_type (0, idx.size ()),
1410 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1411 (execSpace, dst, src, idx, col, op, numCols),
1412 errorCount);
1413 }
1414
1415 if (errorCount != 0) {
1416 constexpr size_t maxNumBadIndicesToPrint = 100;
1417
1418 std::ostringstream os; // for error reporting
1419 os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1420 "found " << errorCount
1421 << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
1422
1423 // Go back and find any out-of-bounds entries in the array of
1424 // row indices. Performance doesn't matter since we are
1425 // already in an error state, so we can do this sequentially,
1426 // on host.
1427 auto idx_h = Kokkos::create_mirror_view (idx);
1428
1429 // DEEP_COPY REVIEW - NOT TESTED
1430 Kokkos::deep_copy (idx_h, idx);
1431
1432 std::vector<row_index_type> badRows;
1433 const size_type numRowInds = idx_h.extent (0);
1434 for (size_type k = 0; k < numRowInds; ++k) {
1435 if (idx_h(k) < static_cast<row_index_type> (0) ||
1436 idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1437 badRows.push_back (idx_h(k));
1438 }
1439 }
1440
1441 if (badRows.size () != 0) {
1442 os << badRows.size () << " out-of-bounds row ind"
1443 << (badRows.size () != size_t (1) ? "ices" : "ex");
1444 if (badRows.size () <= maxNumBadIndicesToPrint) {
1445 os << ": [";
1446 for (size_t k = 0; k < badRows.size (); ++k) {
1447 os << badRows[k];
1448 if (k + 1 < badRows.size ()) {
1449 os << ", ";
1450 }
1451 }
1452 os << "]. ";
1453 }
1454 else {
1455 os << ". ";
1456 }
1457 }
1458 else {
1459 os << "No out-of-bounds row indices. ";
1460 }
1461
1462 // Go back and find any out-of-bounds entries in the array
1463 // of column indices.
1464 auto col_h = Kokkos::create_mirror_view (col);
1465
1466 // DEEP_COPY REVIEW - NOT TESTED
1467 Kokkos::deep_copy (col_h, col);
1468
1469 std::vector<col_index_type> badCols;
1470 const size_type numColInds = col_h.extent (0);
1471 for (size_type k = 0; k < numColInds; ++k) {
1472 if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1473 badCols.push_back (col_h(k));
1474 }
1475 }
1476
1477 if (badCols.size () != 0) {
1478 os << badCols.size () << " out-of-bounds column ind"
1479 << (badCols.size () != size_t (1) ? "ices" : "ex");
1480 if (badCols.size () <= maxNumBadIndicesToPrint) {
1481 for (size_t k = 0; k < badCols.size (); ++k) {
1482 os << ": [";
1483 os << badCols[k];
1484 if (k + 1 < badCols.size ()) {
1485 os << ", ";
1486 }
1487 }
1488 os << "]. ";
1489 }
1490 else {
1491 os << ". ";
1492 }
1493 }
1494 else {
1495 os << "No out-of-bounds column indices. ";
1496 }
1497
1498 TEUCHOS_TEST_FOR_EXCEPTION
1499 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1500 std::logic_error, "Tpetra::MultiVector variable stride unpack "
1501 "kernel reports errorCount=" << errorCount << ", but we failed "
1502 "to find any bad rows or columns. This should never happen. "
1503 "Please report this to the Tpetra developers.");
1504
1505 throw std::runtime_error (os.str ());
1506 } // hasErr
1507 }
1508 };
1509
1510 template <typename ExecutionSpace,
1511 typename DstView,
1512 typename SrcView,
1513 typename IdxView,
1514 typename ColView,
1515 typename Op>
1516 void
1517 unpack_array_multi_column_variable_stride (const ExecutionSpace& execSpace,
1518 const DstView& dst,
1519 const SrcView& src,
1520 const IdxView& idx,
1521 const ColView& col,
1522 const Op& op,
1523 const size_t numCols,
1524 const bool use_atomic_updates,
1525 const bool debug)
1526 {
1527 static_assert (Kokkos::is_view<DstView>::value,
1528 "DstView must be a Kokkos::View.");
1529 static_assert (Kokkos::is_view<SrcView>::value,
1530 "SrcView must be a Kokkos::View.");
1531 static_assert (Kokkos::is_view<IdxView>::value,
1532 "IdxView must be a Kokkos::View.");
1533 static_assert (Kokkos::is_view<ColView>::value,
1534 "ColView must be a Kokkos::View.");
1535 static_assert (static_cast<int> (DstView::rank) == 2,
1536 "DstView must be a rank-2 Kokkos::View.");
1537 static_assert (static_cast<int> (SrcView::rank) == 1,
1538 "SrcView must be a rank-1 Kokkos::View.");
1539 static_assert (static_cast<int> (IdxView::rank) == 1,
1540 "IdxView must be a rank-1 Kokkos::View.");
1541 static_assert (static_cast<int> (ColView::rank) == 1,
1542 "ColView must be a rank-1 Kokkos::View.");
1543
1544 if (debug) {
1545 using impl_type =
1546 UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1547 DstView, SrcView, IdxView, ColView, Op>;
1548 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1549 use_atomic_updates);
1550 }
1551 else {
1552 using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1553 DstView, SrcView, IdxView, ColView, Op>;
1554 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1555 use_atomic_updates);
1556 }
1557 }
1558
1559 template <typename DstView, typename SrcView,
1560 typename DstIdxView, typename SrcIdxView, typename Op,
1561 typename Enabled = void>
1562 struct PermuteArrayMultiColumn {
1563 using size_type = typename DstView::size_type;
1564
1565 DstView dst;
1566 SrcView src;
1567 DstIdxView dst_idx;
1568 SrcIdxView src_idx;
1569 size_t numCols;
1570 Op op;
1571
1572 PermuteArrayMultiColumn (const DstView& dst_,
1573 const SrcView& src_,
1574 const DstIdxView& dst_idx_,
1575 const SrcIdxView& src_idx_,
1576 const size_t numCols_,
1577 const Op& op_) :
1578 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1579 numCols(numCols_), op(op_) {}
1580
1581 KOKKOS_INLINE_FUNCTION void
1582 operator() (const size_type k) const {
1583 const typename DstIdxView::value_type toRow = dst_idx(k);
1584 const typename SrcIdxView::value_type fromRow = src_idx(k);
1585 nonatomic_tag tag; // permute does not need atomics
1586 for (size_t j = 0; j < numCols; ++j) {
1587 op(tag, dst(toRow, j), src(fromRow, j));
1588 }
1589 }
1590
1591 template <typename ExecutionSpace>
1592 static void
1593 permute (const ExecutionSpace &space,
1594 const DstView& dst,
1595 const SrcView& src,
1596 const DstIdxView& dst_idx,
1597 const SrcIdxView& src_idx,
1598 const size_t numCols,
1599 const Op& op)
1600 {
1601 using range_type =
1602 Kokkos::RangePolicy<ExecutionSpace, size_type>;
1603 // permute does not need atomics for Op
1604 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1605 Kokkos::parallel_for
1606 ("Tpetra::MultiVector permute multicol const stride",
1607 range_type (space, 0, n),
1608 PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols, op));
1609 }
1610 };
1611
1612// clang-format on
1613// To do: Add enable_if<> restrictions on DstView::rank == 1,
1614// SrcView::rank == 2
1615template <typename ExecutionSpace, typename DstView, typename SrcView,
1616 typename DstIdxView, typename SrcIdxView, typename Op>
1617void permute_array_multi_column(const ExecutionSpace &space, const DstView &dst,
1618 const SrcView &src, const DstIdxView &dst_idx,
1619 const SrcIdxView &src_idx, size_t numCols,
1620 const Op &op) {
1621 PermuteArrayMultiColumn<DstView, SrcView, DstIdxView, SrcIdxView,
1622 Op>::permute(space, dst, src, dst_idx, src_idx,
1623 numCols, op);
1624}
1625// clang-format off
1626
1627 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1628 // SrcView::rank == 2
1629 template <typename DstView, typename SrcView,
1630 typename DstIdxView, typename SrcIdxView, typename Op>
1631 void permute_array_multi_column(const DstView& dst,
1632 const SrcView& src,
1633 const DstIdxView& dst_idx,
1634 const SrcIdxView& src_idx,
1635 size_t numCols,
1636 const Op& op) {
1637 using execution_space = typename DstView::execution_space;
1638 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView,Op>::permute(
1639 execution_space(), dst, src, dst_idx, src_idx, numCols, op);
1640 }
1641
1642 template <typename DstView, typename SrcView,
1643 typename DstIdxView, typename SrcIdxView,
1644 typename DstColView, typename SrcColView, typename Op,
1645 typename Enabled = void>
1646 struct PermuteArrayMultiColumnVariableStride {
1647 using size_type = typename DstView::size_type;
1648
1649 DstView dst;
1650 SrcView src;
1651 DstIdxView dst_idx;
1652 SrcIdxView src_idx;
1653 DstColView dst_col;
1654 SrcColView src_col;
1655 size_t numCols;
1656 Op op;
1657
1658 PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1659 const SrcView& src_,
1660 const DstIdxView& dst_idx_,
1661 const SrcIdxView& src_idx_,
1662 const DstColView& dst_col_,
1663 const SrcColView& src_col_,
1664 const size_t numCols_,
1665 const Op& op_) :
1666 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1667 dst_col(dst_col_), src_col(src_col_),
1668 numCols(numCols_), op(op_) {}
1669
1670 KOKKOS_INLINE_FUNCTION void
1671 operator() (const size_type k) const {
1672 const typename DstIdxView::value_type toRow = dst_idx(k);
1673 const typename SrcIdxView::value_type fromRow = src_idx(k);
1674 const nonatomic_tag tag; // permute does not need atomics
1675 for (size_t j = 0; j < numCols; ++j) {
1676 op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1677 }
1678 }
1679
1680 template <typename ExecutionSpace>
1681 static void
1682 permute ( const ExecutionSpace &space,
1683 const DstView& dst,
1684 const SrcView& src,
1685 const DstIdxView& dst_idx,
1686 const SrcIdxView& src_idx,
1687 const DstColView& dst_col,
1688 const SrcColView& src_col,
1689 const size_t numCols,
1690 const Op& op)
1691 {
1692
1693 static_assert(Kokkos::SpaceAccessibility<
1694 ExecutionSpace, typename DstView::memory_space>::accessible,
1695 "ExecutionSpace must be able to access DstView");
1696
1697 using range_type = Kokkos::RangePolicy<ExecutionSpace, size_type>;
1698 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1699 Kokkos::parallel_for
1700 ("Tpetra::MultiVector permute multicol var stride",
1701 range_type (space, 0, n),
1702 PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1703 dst_col, src_col, numCols, op));
1704 }
1705 };
1706
1707// clang-format on
1708// To do: Add enable_if<> restrictions on DstView::rank == 1,
1709// SrcView::rank == 2
1710template <typename ExecutionSpace, typename DstView, typename SrcView,
1711 typename DstIdxView, typename SrcIdxView, typename DstColView,
1712 typename SrcColView, typename Op>
1713void permute_array_multi_column_variable_stride(
1714 const ExecutionSpace &space, const DstView &dst, const SrcView &src,
1715 const DstIdxView &dst_idx, const SrcIdxView &src_idx,
1716 const DstColView &dst_col, const SrcColView &src_col, size_t numCols,
1717 const Op &op) {
1718 PermuteArrayMultiColumnVariableStride<DstView, SrcView, DstIdxView,
1719 SrcIdxView, DstColView, SrcColView,
1720 Op>::permute(space, dst, src, dst_idx,
1721 src_idx, dst_col, src_col,
1722 numCols, op);
1723}
1724// clang-format off
1725
1726 // To do: Add enable_if<> restrictions on DstView::rank == 1,
1727 // SrcView::rank == 2
1728 template <typename DstView, typename SrcView,
1729 typename DstIdxView, typename SrcIdxView,
1730 typename DstColView, typename SrcColView, typename Op>
1731 void permute_array_multi_column_variable_stride(const DstView& dst,
1732 const SrcView& src,
1733 const DstIdxView& dst_idx,
1734 const SrcIdxView& src_idx,
1735 const DstColView& dst_col,
1736 const SrcColView& src_col,
1737 size_t numCols,
1738 const Op& op) {
1739 using execution_space = typename DstView::execution_space;
1740 PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1741 DstIdxView,SrcIdxView,DstColView,SrcColView,Op>::permute(
1742 execution_space(), dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
1743 }
1744
1745} // Details namespace
1746} // KokkosRefactor namespace
1747} // Tpetra namespace
1748
1749#endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...