11#ifndef TPETRA_MULTIVECTOR_DEF_HPP
12#define TPETRA_MULTIVECTOR_DEF_HPP
24#include "Tpetra_Vector.hpp"
36#include "Tpetra_Details_Random.hpp"
37#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
38# include "Teuchos_SerialDenseMatrix.hpp"
40#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
41#include "KokkosCompat_View.hpp"
42#include "KokkosBlas.hpp"
43#include "KokkosKernels_Utils.hpp"
44#include "Kokkos_Random.hpp"
45#include "Kokkos_ArithTraits.hpp"
49#ifdef HAVE_TPETRA_INST_FLOAT128
52 template<
class Generator>
53 struct rand<Generator, __float128> {
54 static KOKKOS_INLINE_FUNCTION __float128 max ()
56 return static_cast<__float128
> (1.0);
58 static KOKKOS_INLINE_FUNCTION __float128
63 const __float128 scalingFactor =
64 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
65 static_cast<__float128
> (2.0);
66 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
67 const __float128 lowerOrderTerm =
68 static_cast<__float128
> (gen.drand ()) * scalingFactor;
69 return higherOrderTerm + lowerOrderTerm;
71 static KOKKOS_INLINE_FUNCTION __float128
72 draw (Generator& gen,
const __float128& range)
75 const __float128 scalingFactor =
76 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
77 static_cast<__float128
> (2.0);
78 const __float128 higherOrderTerm =
79 static_cast<__float128
> (gen.drand (range));
80 const __float128 lowerOrderTerm =
81 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
82 return higherOrderTerm + lowerOrderTerm;
84 static KOKKOS_INLINE_FUNCTION __float128
85 draw (Generator& gen,
const __float128& start,
const __float128& end)
88 const __float128 scalingFactor =
89 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
90 static_cast<__float128
> (2.0);
91 const __float128 higherOrderTerm =
92 static_cast<__float128
> (gen.drand (start, end));
93 const __float128 lowerOrderTerm =
94 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
95 return higherOrderTerm + lowerOrderTerm;
117 template<
class ST,
class LO,
class GO,
class NT>
118 typename Tpetra::MultiVector<ST, LO, GO, NT>::wrapped_dual_view_type
119 allocDualView (
const size_t lclNumRows,
120 const size_t numCols,
121 const bool zeroOut =
true,
122 const bool allowPadding =
false)
125 using Kokkos::AllowPadding;
126 using Kokkos::view_alloc;
127 using Kokkos::WithoutInitializing;
129 typedef typename Tpetra::MultiVector<ST, LO, GO, NT>::wrapped_dual_view_type wrapped_dual_view_type;
130 typedef typename dual_view_type::t_dev dev_view_type;
136 const std::string label (
"MV::DualView");
137 const bool debug = Behavior::debug ();
147 dev_view_type d_view;
150 d_view = dev_view_type (view_alloc (label, AllowPadding),
151 lclNumRows, numCols);
154 d_view = dev_view_type (view_alloc (label),
155 lclNumRows, numCols);
160 d_view = dev_view_type (view_alloc (label,
163 lclNumRows, numCols);
166 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
167 lclNumRows, numCols);
178 const ST nan = Kokkos::ArithTraits<ST>::nan ();
179 KokkosBlas::fill (d_view, nan);
183 TEUCHOS_TEST_FOR_EXCEPTION
184 (
static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
185 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
186 "allocDualView: d_view's dimensions actual dimensions do not match "
187 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
188 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
189 <<
". Please report this bug to the Tpetra developers.");
192 return wrapped_dual_view_type(d_view);
199 template<
class T,
class ExecSpace>
200 struct MakeUnmanagedView {
210 typedef typename std::conditional<
211 Kokkos::SpaceAccessibility<
212 typename ExecSpace::memory_space,
213 Kokkos::HostSpace>::accessible,
214 typename ExecSpace::device_type,
215 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
216 typedef Kokkos::LayoutLeft array_layout;
217 typedef Kokkos::View<T*, array_layout, host_exec_space,
218 Kokkos::MemoryUnmanaged> view_type;
220 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
222 const size_t numEnt =
static_cast<size_t> (x_in.size ());
226 return view_type (x_in.getRawPtr (), numEnt);
232 template<
class WrappedDualViewType>
234 takeSubview (
const WrappedDualViewType& X,
235 const std::pair<size_t, size_t>& rowRng,
236 const Kokkos::ALL_t& colRng)
240 return WrappedDualViewType(X,rowRng,colRng);
243 template<
class WrappedDualViewType>
245 takeSubview (
const WrappedDualViewType& X,
246 const Kokkos::ALL_t& rowRng,
247 const std::pair<size_t, size_t>& colRng)
249 using DualViewType =
typename WrappedDualViewType::DVT;
252 if (X.extent (0) == 0 && X.extent (1) != 0) {
253 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
256 return WrappedDualViewType(X,rowRng,colRng);
260 template<
class WrappedDualViewType>
262 takeSubview (
const WrappedDualViewType& X,
263 const std::pair<size_t, size_t>& rowRng,
264 const std::pair<size_t, size_t>& colRng)
266 using DualViewType =
typename WrappedDualViewType::DVT;
276 if (X.extent (0) == 0 && X.extent (1) != 0) {
277 return WrappedDualViewType(DualViewType (
"MV::DualView", 0, colRng.second - colRng.first));
280 return WrappedDualViewType(X,rowRng,colRng);
284 template<
class WrappedOrNotDualViewType>
286 getDualViewStride (
const WrappedOrNotDualViewType& dv)
292 size_t strides[WrappedOrNotDualViewType::t_dev::rank+1];
294 const size_t LDA = strides[1];
295 const size_t numRows = dv.extent (0);
298 return (numRows == 0) ? size_t (1) : numRows;
305 template<
class ViewType>
307 getViewStride (
const ViewType& view)
309 const size_t LDA = view.stride (1);
310 const size_t numRows = view.extent (0);
313 return (numRows == 0) ? size_t (1) : numRows;
320 template <
class impl_scalar_type,
class buffer_device_type>
323 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports
326 if (! imports.need_sync_device ()) {
331 size_t localLengthThreshold =
333 return imports.extent(0) <= localLengthThreshold;
338 template <
class SC,
class LO,
class GO,
class NT>
340 runKernelOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
342 if (! X.need_sync_device ()) {
347 size_t localLengthThreshold =
349 return X.getLocalLength () <= localLengthThreshold;
353 template <
class SC,
class LO,
class GO,
class NT>
355 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
357 const ::Tpetra::Details::EWhichNorm whichNorm)
362 using val_type =
typename MV::impl_scalar_type;
363 using mag_type =
typename MV::mag_type;
364 using dual_view_type =
typename MV::dual_view_type;
367 auto comm = map.is_null () ? nullptr : map->getComm ().getRawPtr ();
368 auto whichVecs = getMultiVectorWhichVectors (X);
372 const bool runOnHost = runKernelOnHost (X);
374 using view_type =
typename dual_view_type::t_host;
375 using array_layout =
typename view_type::array_layout;
376 using device_type =
typename view_type::device_type;
380 mag_type> (norms, X_lcl, whichNorm, whichVecs,
381 isConstantStride, isDistributed, comm);
384 using view_type =
typename dual_view_type::t_dev;
385 using array_layout =
typename view_type::array_layout;
386 using device_type =
typename view_type::device_type;
390 mag_type> (norms, X_lcl, whichNorm, whichVecs,
391 isConstantStride, isDistributed, comm);
400 template <
typename DstView,
typename SrcView>
401 struct AddAssignFunctor {
404 AddAssignFunctor(DstView &tgt_, SrcView &src_) : tgt(tgt_), src(src_) {}
406 KOKKOS_INLINE_FUNCTION
void
407 operator () (
const size_t k)
const { tgt(k) += src(k); }
414 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
418 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
421 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
424 base_type (Teuchos::rcp (new
map_type ()))
427 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
429 MultiVector (
const Teuchos::RCP<const map_type>& map,
430 const size_t numVecs,
431 const bool zeroOut) :
437 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
440 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
443 const Teuchos::DataAccess copyOrView) :
449 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
450 "const Teuchos::DataAccess): ";
454 if (copyOrView == Teuchos::Copy) {
458 this->
view_ = cpy.view_;
461 else if (copyOrView == Teuchos::View) {
464 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
465 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
466 "invalid value " << copyOrView <<
". Valid values include "
467 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
468 << Teuchos::View <<
".");
472 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
474 MultiVector (
const Teuchos::RCP<const map_type>& map,
477 view_ (wrapped_dual_view_type(view))
479 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
480 const size_t lclNumRows_map = map.is_null () ?
size_t (0) :
481 map->getLocalNumElements ();
482 const size_t lclNumRows_view = view.extent (0);
483 const size_t LDA = getDualViewStride (
view_);
485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
486 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
487 std::invalid_argument,
"Kokkos::DualView does not match Map. "
488 "map->getLocalNumElements() = " << lclNumRows_map
489 <<
", view.extent(0) = " << lclNumRows_view
490 <<
", and getStride() = " << LDA <<
".");
493 const bool debug = Behavior::debug ();
495 using ::Tpetra::Details::checkGlobalDualViewValidity;
496 std::ostringstream gblErrStrm;
497 const bool verbose = Behavior::verbose ();
498 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
499 const bool gblValid =
500 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
502 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
503 (! gblValid, std::runtime_error, gblErrStrm.str ());
508 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
510 MultiVector (
const Teuchos::RCP<const map_type>& map,
511 const wrapped_dual_view_type& view) :
515 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
516 const size_t lclNumRows_map = map.is_null () ?
size_t (0) :
517 map->getLocalNumElements ();
518 const size_t lclNumRows_view = view.extent (0);
519 const size_t LDA = getDualViewStride (view);
521 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
522 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
523 std::invalid_argument,
"Kokkos::DualView does not match Map. "
524 "map->getLocalNumElements() = " << lclNumRows_map
525 <<
", view.extent(0) = " << lclNumRows_view
526 <<
", and getStride() = " << LDA <<
".");
529 const bool debug = Behavior::debug ();
531 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
532 std::ostringstream gblErrStrm;
533 const bool verbose = Behavior::verbose ();
534 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
535 const bool gblValid =
536 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
538 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
539 (! gblValid, std::runtime_error, gblErrStrm.str ());
545 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
547 MultiVector (
const Teuchos::RCP<const map_type>& map,
548 const typename dual_view_type::t_dev& d_view) :
551 using Teuchos::ArrayRCP;
553 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
557 const size_t LDA = getViewStride (d_view);
558 const size_t lclNumRows = map->getLocalNumElements ();
559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
560 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
561 "Kokkos::View. map->getLocalNumElements() = " << lclNumRows
562 <<
", View's column stride = " << LDA
563 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
565 auto h_view = Kokkos::create_mirror_view (d_view);
567 view_ = wrapped_dual_view_type(dual_view);
570 const bool debug = Behavior::debug ();
572 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
573 std::ostringstream gblErrStrm;
574 const bool verbose = Behavior::verbose ();
575 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
576 const bool gblValid =
577 checkGlobalWrappedDualViewValidity (&gblErrStrm,
view_, verbose,
579 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
580 (! gblValid, std::runtime_error, gblErrStrm.str ());
585 dual_view.modify_device ();
588 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
590 MultiVector (
const Teuchos::RCP<const map_type>& map,
594 view_ (wrapped_dual_view_type(view,origView))
596 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
598 const size_t LDA = getDualViewStride (origView);
600 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
601 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
602 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
603 <<
". This may also mean that the input origView's first dimension (number "
604 "of rows = " << origView.extent (0) <<
") does not not match the number "
605 "of entries in the Map on the calling process.");
608 const bool debug = Behavior::debug ();
610 using ::Tpetra::Details::checkGlobalDualViewValidity;
611 std::ostringstream gblErrStrm;
612 const bool verbose = Behavior::verbose ();
613 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
614 const bool gblValid_0 =
615 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
617 const bool gblValid_1 =
618 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
620 const bool gblValid = gblValid_0 && gblValid_1;
621 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
622 (! gblValid, std::runtime_error, gblErrStrm.str ());
627 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
629 MultiVector (
const Teuchos::RCP<const map_type>& map,
631 const Teuchos::ArrayView<const size_t>& whichVectors) :
637 using Kokkos::subview;
638 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
641 const bool debug = Behavior::debug ();
643 using ::Tpetra::Details::checkGlobalDualViewValidity;
644 std::ostringstream gblErrStrm;
645 const bool verbose = Behavior::verbose ();
646 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
647 const bool gblValid =
648 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
651 (! gblValid, std::runtime_error, gblErrStrm.str ());
654 const size_t lclNumRows = map.is_null () ? size_t (0) :
655 map->getLocalNumElements ();
662 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
663 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
664 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
665 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
666 if (whichVectors.size () != 0) {
667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
668 view.extent (1) != 0 && view.extent (1) == 0,
669 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
670 " = " << whichVectors.size () <<
" > 0.");
671 size_t maxColInd = 0;
672 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
673 for (size_type k = 0; k < whichVectors.size (); ++k) {
674 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
675 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
676 std::invalid_argument,
"whichVectors[" << k <<
"] = "
677 "Teuchos::OrdinalTraits<size_t>::invalid().");
678 maxColInd = std::max (maxColInd, whichVectors[k]);
680 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
681 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
682 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
683 <<
" <= max(whichVectors) = " << maxColInd <<
".");
688 const size_t LDA = getDualViewStride (view);
689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
690 (LDA < lclNumRows, std::invalid_argument,
691 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
693 if (whichVectors.size () == 1) {
703 const std::pair<size_t, size_t> colRng (whichVectors[0],
704 whichVectors[0] + 1);
711 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
713 MultiVector (
const Teuchos::RCP<const map_type>& map,
714 const wrapped_dual_view_type& view,
715 const Teuchos::ArrayView<const size_t>& whichVectors) :
721 using Kokkos::subview;
722 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
725 const bool debug = Behavior::debug ();
727 using ::Tpetra::Details::checkGlobalWrappedDualViewValidity;
728 std::ostringstream gblErrStrm;
729 const bool verbose = Behavior::verbose ();
730 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
731 const bool gblValid =
732 checkGlobalWrappedDualViewValidity (&gblErrStrm, view, verbose,
734 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
735 (! gblValid, std::runtime_error, gblErrStrm.str ());
738 const size_t lclNumRows = map.is_null () ? size_t (0) :
739 map->getLocalNumElements ();
746 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
747 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
748 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
749 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
750 if (whichVectors.size () != 0) {
751 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
752 view.extent (1) != 0 && view.extent (1) == 0,
753 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
754 " = " << whichVectors.size () <<
" > 0.");
755 size_t maxColInd = 0;
756 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
757 for (size_type k = 0; k < whichVectors.size (); ++k) {
758 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
759 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
760 std::invalid_argument,
"whichVectors[" << k <<
"] = "
761 "Teuchos::OrdinalTraits<size_t>::invalid().");
762 maxColInd = std::max (maxColInd, whichVectors[k]);
764 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
765 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
766 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
767 <<
" <= max(whichVectors) = " << maxColInd <<
".");
772 const size_t LDA = getDualViewStride (view);
773 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
774 (LDA < lclNumRows, std::invalid_argument,
775 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
777 if (whichVectors.size () == 1) {
787 const std::pair<size_t, size_t> colRng (whichVectors[0],
788 whichVectors[0] + 1);
796 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
798 MultiVector (
const Teuchos::RCP<const map_type>& map,
801 const Teuchos::ArrayView<const size_t>& whichVectors) :
807 using Kokkos::subview;
808 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
811 const bool debug = Behavior::debug ();
813 using ::Tpetra::Details::checkGlobalDualViewValidity;
814 std::ostringstream gblErrStrm;
815 const bool verbose = Behavior::verbose ();
816 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
817 const bool gblValid_0 =
818 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
820 const bool gblValid_1 =
821 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
823 const bool gblValid = gblValid_0 && gblValid_1;
824 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
825 (! gblValid, std::runtime_error, gblErrStrm.str ());
835 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
836 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
837 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
838 <<
" < map->getLocalNumElements() = " << lclNumRows <<
".");
839 if (whichVectors.size () != 0) {
840 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
841 view.extent (1) != 0 && view.extent (1) == 0,
842 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
843 " = " << whichVectors.size () <<
" > 0.");
844 size_t maxColInd = 0;
845 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
846 for (size_type k = 0; k < whichVectors.size (); ++k) {
847 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
848 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
849 std::invalid_argument,
"whichVectors[" << k <<
"] = "
850 "Teuchos::OrdinalTraits<size_t>::invalid().");
851 maxColInd = std::max (maxColInd, whichVectors[k]);
853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
854 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
855 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
856 <<
" <= max(whichVectors) = " << maxColInd <<
".");
861 const size_t LDA = getDualViewStride (origView);
862 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
863 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
864 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
865 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
866 <<
", origView.stride(1) = " << origView.view_device().stride(1) <<
".");
868 if (whichVectors.size () == 1) {
877 const std::pair<size_t, size_t> colRng (whichVectors[0],
878 whichVectors[0] + 1);
886 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
888 MultiVector (
const Teuchos::RCP<const map_type>& map,
889 const Teuchos::ArrayView<const Scalar>& data,
891 const size_t numVecs) :
894 typedef LocalOrdinal LO;
895 typedef GlobalOrdinal GO;
896 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
902 const size_t lclNumRows =
903 map.is_null () ? size_t (0) : map->getLocalNumElements ();
904 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
905 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
906 "map->getLocalNumElements() = " << lclNumRows <<
".");
908 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
909 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
910 (
static_cast<size_t> (data.size ()) < minNumEntries,
911 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
912 "entries, given the input Map and number of vectors in the MultiVector."
913 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
914 "map->getLocalNumElements () = " << minNumEntries <<
".");
917 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
929 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
930 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
931 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
936 const size_t outStride =
937 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
938 if (LDA == outStride) {
945 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
947 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
949 for (
size_t j = 0; j < numVecs; ++j) {
950 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
951 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
958 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
960 MultiVector (
const Teuchos::RCP<const map_type>& map,
961 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
962 const size_t numVecs) :
966 typedef LocalOrdinal LO;
967 typedef GlobalOrdinal GO;
968 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
971 const size_t lclNumRows =
972 map.is_null () ? size_t (0) : map->getLocalNumElements ();
973 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
974 (numVecs < 1 || numVecs !=
static_cast<size_t> (ArrayOfPtrs.size ()),
975 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
976 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
977 for (
size_t j = 0; j < numVecs; ++j) {
978 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
979 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
980 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
981 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
982 << X_j_av.size () <<
" < map->getLocalNumElements() = " << lclNumRows
986 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
991 using array_layout =
typename decltype (X_out)::array_layout;
992 using input_col_view_type =
typename Kokkos::View<
const IST*,
995 Kokkos::MemoryUnmanaged>;
997 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
998 for (
size_t j = 0; j < numVecs; ++j) {
999 Teuchos::ArrayView<const IST> X_j_av =
1000 Teuchos::av_reinterpret_cast<const IST> (ArrayOfPtrs[j]);
1001 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
1002 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
1008 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1014 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1019 if (this->
getMap ().is_null ()) {
1020 return static_cast<size_t> (0);
1022 return this->
getMap ()->getLocalNumElements ();
1026 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1031 if (this->
getMap ().is_null ()) {
1032 return static_cast<size_t> (0);
1034 return this->
getMap ()->getGlobalNumElements ();
1038 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1046 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1052 auto thisData =
view_.getDualView().view_host().data();
1053 auto otherData = other.
view_.getDualView().view_host().data();
1054 size_t thisSpan =
view_.getDualView().view_host().span();
1055 size_t otherSpan = other.
view_.getDualView().view_host().span();
1056 return (otherData <= thisData && thisData < otherData + otherSpan)
1057 || (thisData <= otherData && otherData < thisData + thisSpan);
1060 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1069 const MV* src =
dynamic_cast<const MV*
> (&sourceObj);
1070 if (src ==
nullptr) {
1084 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1091 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1096 const size_t numSameIDs,
1097 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
1098 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
1106 using KokkosRefactor::Details::permute_array_multi_column;
1107 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
1108 using Kokkos::Compat::create_const_view;
1112 MV& sourceMV =
const_cast<MV &
>(
dynamic_cast<const MV&
> (sourceObj));
1113 const bool copyOnHost = runKernelOnHost(sourceMV);
1114 const char longFuncNameHost[] =
"Tpetra::MultiVector::copyAndPermute[Host]";
1115 const char longFuncNameDevice[] =
"Tpetra::MultiVector::copyAndPermute[Device]";
1116 const char tfecfFuncName[] =
"copyAndPermute: ";
1117 ProfilingRegion regionCAP (copyOnHost ? longFuncNameHost : longFuncNameDevice);
1119 const bool verbose = Behavior::verbose ();
1120 std::unique_ptr<std::string> prefix;
1122 auto map = this->
getMap ();
1123 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1124 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1125 std::ostringstream os;
1126 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
1127 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1128 os <<
"Start" << endl;
1129 std::cerr << os.str ();
1132 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1133 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
1134 std::logic_error,
"permuteToLIDs.extent(0) = "
1135 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
1136 << permuteFromLIDs.extent (0) <<
".");
1141 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1142 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1143 std::logic_error,
"Input MultiVector needs sync to both host "
1146 std::ostringstream os;
1147 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1148 std::cerr << os.str ();
1152 std::ostringstream os;
1153 os << *prefix <<
"Copy" << endl;
1154 std::cerr << os.str ();
1179 if (numSameIDs > 0) {
1180 const std::pair<size_t, size_t> rows (0, numSameIDs);
1182 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1183 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1185 for (
size_t j = 0; j < numCols; ++j) {
1186 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1187 const size_t srcCol =
1188 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1190 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1191 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1195 Kokkos::RangePolicy<execution_space, size_t>;
1196 range_t rp(space, 0,numSameIDs);
1197 Tpetra::Details::AddAssignFunctor<
decltype(tgt_j),
decltype(src_j)>
1199 Kokkos::parallel_for(rp, aaf);
1204 Kokkos::deep_copy (space, tgt_j, src_j);
1210 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1211 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1213 for (
size_t j = 0; j < numCols; ++j) {
1214 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1215 const size_t srcCol =
1216 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1218 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1219 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1223 Kokkos::RangePolicy<execution_space, size_t>;
1224 range_t rp(space, 0,numSameIDs);
1225 Tpetra::Details::AddAssignFunctor<
decltype(tgt_j),
decltype(src_j)>
1227 Kokkos::parallel_for(rp, aaf);
1232 Kokkos::deep_copy (space, tgt_j, src_j);
1250 if (permuteFromLIDs.extent (0) == 0 ||
1251 permuteToLIDs.extent (0) == 0) {
1253 std::ostringstream os;
1254 os << *prefix <<
"No permutations. Done!" << endl;
1255 std::cerr << os.str ();
1261 std::ostringstream os;
1262 os << *prefix <<
"Permute" << endl;
1263 std::cerr << os.str ();
1268 const bool nonConstStride =
1269 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1272 std::ostringstream os;
1273 os << *prefix <<
"nonConstStride="
1274 << (nonConstStride ?
"true" :
"false") << endl;
1275 std::cerr << os.str ();
1282 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1283 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1284 if (nonConstStride) {
1286 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1287 tmpTgt.modify_host ();
1288 for (
size_t j = 0; j < numCols; ++j) {
1289 tmpTgt.view_host()(j) = j;
1292 tmpTgt.sync_device ();
1294 tgtWhichVecs = tmpTgt;
1297 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->
whichVectors_ ();
1299 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1303 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1304 (
static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1305 this->getNumVectors (),
1306 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1307 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1308 this->getNumVectors () <<
".");
1310 if (sourceMV.whichVectors_.size () == 0) {
1311 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1312 tmpSrc.modify_host ();
1313 for (
size_t j = 0; j < numCols; ++j) {
1314 tmpSrc.view_host()(j) = j;
1317 tmpSrc.sync_device ();
1319 srcWhichVecs = tmpSrc;
1322 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1323 sourceMV.whichVectors_ ();
1325 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1329 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1330 (
static_cast<size_t> (srcWhichVecs.extent (0)) !=
1331 sourceMV.getNumVectors (), std::logic_error,
1332 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1333 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1339 std::ostringstream os;
1340 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1341 std::cerr << os.str ();
1343 auto tgt_h = this->getLocalViewHost(Access::ReadWrite);
1344 auto src_h = sourceMV.getLocalViewHost(Access::ReadOnly);
1346 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1347 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1348 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1349 auto permuteFromLIDs_h =
1350 create_const_view (permuteFromLIDs.view_host ());
1353 std::ostringstream os;
1354 os << *prefix <<
"Permute on host" << endl;
1355 std::cerr << os.str ();
1357 if (nonConstStride) {
1360 auto tgtWhichVecs_h =
1361 create_const_view (tgtWhichVecs.view_host ());
1362 auto srcWhichVecs_h =
1363 create_const_view (srcWhichVecs.view_host ());
1365 using op_type = KokkosRefactor::Details::AddOp;
1366 permute_array_multi_column_variable_stride (tgt_h, src_h,
1370 srcWhichVecs_h, numCols,
1374 using op_type = KokkosRefactor::Details::InsertOp;
1375 permute_array_multi_column_variable_stride (tgt_h, src_h,
1379 srcWhichVecs_h, numCols,
1385 using op_type = KokkosRefactor::Details::AddOp;
1386 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1387 permuteFromLIDs_h, numCols, op_type());
1390 using op_type = KokkosRefactor::Details::InsertOp;
1391 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1392 permuteFromLIDs_h, numCols, op_type());
1398 std::ostringstream os;
1399 os << *prefix <<
"Get permute LIDs on device" << endl;
1400 std::cerr << os.str ();
1402 auto tgt_d = this->getLocalViewDevice(Access::ReadWrite);
1403 auto src_d = sourceMV.getLocalViewDevice(Access::ReadOnly);
1405 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1406 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1407 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1408 auto permuteFromLIDs_d =
1409 create_const_view (permuteFromLIDs.view_device ());
1412 std::ostringstream os;
1413 os << *prefix <<
"Permute on device" << endl;
1414 std::cerr << os.str ();
1416 if (nonConstStride) {
1419 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1420 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1422 using op_type = KokkosRefactor::Details::AddOp;
1423 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1427 srcWhichVecs_d, numCols,
1431 using op_type = KokkosRefactor::Details::InsertOp;
1432 permute_array_multi_column_variable_stride (space, tgt_d, src_d,
1436 srcWhichVecs_d, numCols,
1442 using op_type = KokkosRefactor::Details::AddOp;
1443 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1444 permuteFromLIDs_d, numCols, op_type());
1447 using op_type = KokkosRefactor::Details::InsertOp;
1448 permute_array_multi_column (space, tgt_d, src_d, permuteToLIDs_d,
1449 permuteFromLIDs_d, numCols, op_type());
1455 std::ostringstream os;
1456 os << *prefix <<
"Done!" << endl;
1457 std::cerr << os.str ();
1462template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1465 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1467 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1470 copyAndPermute(sourceObj, numSameIDs, permuteToLIDs, permuteFromLIDs, CM,
1475 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1480 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1481 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1482 Kokkos::DualView<size_t*, buffer_device_type> ,
1483 size_t& constantNumPackets,
1489 using Kokkos::Compat::create_const_view;
1490 using Kokkos::Compat::getKokkosViewDeepCopy;
1495 MV& sourceMV =
const_cast<MV&
>(
dynamic_cast<const MV&
> (sourceObj));
1497 const bool packOnHost = runKernelOnHost(sourceMV);
1498 const char longFuncNameHost[] =
"Tpetra::MultiVector::packAndPrepare[Host]";
1499 const char longFuncNameDevice[] =
"Tpetra::MultiVector::packAndPrepare[Device]";
1500 const char tfecfFuncName[] =
"packAndPrepare: ";
1501 ProfilingRegion regionPAP (packOnHost ? longFuncNameHost : longFuncNameDevice);
1509 const bool debugCheckIndices = Behavior::debug ();
1514 const bool printDebugOutput = Behavior::verbose ();
1516 std::unique_ptr<std::string> prefix;
1517 if (printDebugOutput) {
1518 auto map = this->getMap ();
1519 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1520 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1521 std::ostringstream os;
1522 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1523 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1524 os <<
"Start" << endl;
1525 std::cerr << os.str ();
1529 const size_t numCols = sourceMV.getNumVectors ();
1534 constantNumPackets = numCols;
1538 if (exportLIDs.extent (0) == 0) {
1539 if (printDebugOutput) {
1540 std::ostringstream os;
1541 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1542 std::cerr << os.str ();
1562 const size_t numExportLIDs = exportLIDs.extent (0);
1563 const size_t newExportsSize = numCols * numExportLIDs;
1564 if (printDebugOutput) {
1565 std::ostringstream os;
1566 os << *prefix <<
"realloc: "
1567 <<
"numExportLIDs: " << numExportLIDs
1568 <<
", exports.extent(0): " << exports.extent (0)
1569 <<
", newExportsSize: " << newExportsSize << std::endl;
1570 std::cerr << os.str ();
1572 reallocDualViewIfNeeded (exports, newExportsSize,
"exports");
1576 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1577 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1578 std::logic_error,
"Input MultiVector needs sync to both host "
1580 if (printDebugOutput) {
1581 std::ostringstream os;
1582 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1583 std::cerr << os.str ();
1595 exports.clear_sync_state ();
1596 exports.modify_host ();
1604 exports.clear_sync_state ();
1605 exports.modify_device ();
1621 if (sourceMV.isConstantStride ()) {
1622 using KokkosRefactor::Details::pack_array_single_column;
1623 if (printDebugOutput) {
1624 std::ostringstream os;
1625 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1626 std::cerr << os.str ();
1629 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1630 pack_array_single_column (exports.view_host (),
1632 exportLIDs.view_host (),
1637 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1638 pack_array_single_column (exports.view_device (),
1640 exportLIDs.view_device (),
1647 using KokkosRefactor::Details::pack_array_single_column;
1648 if (printDebugOutput) {
1649 std::ostringstream os;
1650 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1651 std::cerr << os.str ();
1654 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1655 pack_array_single_column (exports.view_host (),
1657 exportLIDs.view_host (),
1658 sourceMV.whichVectors_[0],
1662 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1663 pack_array_single_column (exports.view_device (),
1665 exportLIDs.view_device (),
1666 sourceMV.whichVectors_[0],
1673 if (sourceMV.isConstantStride ()) {
1674 using KokkosRefactor::Details::pack_array_multi_column;
1675 if (printDebugOutput) {
1676 std::ostringstream os;
1677 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1678 std::cerr << os.str ();
1681 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1682 pack_array_multi_column (exports.view_host (),
1684 exportLIDs.view_host (),
1689 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1690 pack_array_multi_column (exports.view_device (),
1692 exportLIDs.view_device (),
1699 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1700 if (printDebugOutput) {
1701 std::ostringstream os;
1702 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1704 std::cerr << os.str ();
1710 using DV = Kokkos::DualView<IST*, device_type>;
1711 using HES =
typename DV::t_host::execution_space;
1712 using DES =
typename DV::t_dev::execution_space;
1713 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1715 auto src_host = sourceMV.getLocalViewHost(Access::ReadOnly);
1716 pack_array_multi_column_variable_stride
1717 (exports.view_host (),
1719 exportLIDs.view_host (),
1720 getKokkosViewDeepCopy<HES> (whichVecs),
1725 auto src_dev = sourceMV.getLocalViewDevice(Access::ReadOnly);
1726 pack_array_multi_column_variable_stride
1727 (exports.view_device (),
1729 exportLIDs.view_device (),
1730 getKokkosViewDeepCopy<DES> (whichVecs),
1732 debugCheckIndices, space);
1737 if (printDebugOutput) {
1738 std::ostringstream os;
1739 os << *prefix <<
"Done!" << endl;
1740 std::cerr << os.str ();
1746 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1751 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1752 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1753 Kokkos::DualView<size_t*, buffer_device_type> numExportPacketsPerLID,
1754 size_t& constantNumPackets) {
1755 packAndPrepare(sourceObj, exportLIDs, exports, numExportPacketsPerLID, constantNumPackets,
execution_space());
1759 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1761 typename std::enable_if<std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1762 typename NO::device_type::memory_space>::value,
1767 const std::string* prefix,
1768 const bool areRemoteLIDsContiguous,
1777 std::ostringstream os;
1778 os << *prefix <<
"Realloc (if needed) imports_ from "
1779 << this->
imports_.extent (0) <<
" to " << newSize << std::endl;
1780 std::cerr << os.str ();
1783 bool reallocated =
false;
1786 using DV = Kokkos::DualView<IST*, Kokkos::LayoutLeft, buffer_device_type>;
1798 if ((std::is_same_v<typename dual_view_type::t_dev::device_type, typename dual_view_type::t_host::device_type> ||
1801 areRemoteLIDsContiguous &&
1803 (getNumVectors() == 1) &&
1806 size_t offset = getLocalLength () - newSize;
1807 reallocated = this->imports_.view_device().data() != view_.getDualView().view_device().data() + offset;
1809 typedef std::pair<size_t, size_t> range_type;
1810 this->imports_ = DV(view_.getDualView(),
1811 range_type (offset, getLocalLength () ),
1815 std::ostringstream os;
1816 os << *prefix <<
"Aliased imports_ to MV.view_" << std::endl;
1817 std::cerr << os.str ();
1827 std::ostringstream os;
1828 os << *prefix <<
"Finished realloc'ing unaliased_imports_" << std::endl;
1829 std::cerr << os.str ();
1831 this->imports_ = this->unaliased_imports_;
1836 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1838 typename std::enable_if<!std::is_same<typename Tpetra::Details::DefaultTypes::CommBufferMemorySpace<typename NO::execution_space>::type,
1839 typename NO::device_type::memory_space>::value,
1844 const std::string* prefix,
1845 const bool areRemoteLIDsContiguous,
1851 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1856 const std::string* prefix,
1857 const bool areRemoteLIDsContiguous,
1860 return reallocImportsIfNeededImpl<Node>(newSize, verbose, prefix, areRemoteLIDsContiguous, CM);
1863 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1867 return (this->
imports_.view_device().data() + this->imports_.view_device().extent(0) ==
1868 view_.getDualView().view_device().data() +
view_.getDualView().view_device().extent(0));
1872 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1876 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1877 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1878 Kokkos::DualView<size_t*, buffer_device_type> ,
1879 const size_t constantNumPackets,
1885 using KokkosRefactor::Details::unpack_array_multi_column;
1886 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1887 using Kokkos::Compat::getKokkosViewDeepCopy;
1890 const bool unpackOnHost = runKernelOnHost(imports);
1892 const char longFuncNameHost[] =
"Tpetra::MultiVector::unpackAndCombine[Host]";
1893 const char longFuncNameDevice[] =
"Tpetra::MultiVector::unpackAndCombine[Device]";
1894 const char * longFuncName = unpackOnHost ? longFuncNameHost : longFuncNameDevice;
1895 const char tfecfFuncName[] =
"unpackAndCombine: ";
1896 ProfilingRegion regionUAC (longFuncName);
1904 const bool debugCheckIndices = Behavior::debug ();
1906 const bool printDebugOutput = Behavior::verbose ();
1907 std::unique_ptr<std::string> prefix;
1908 if (printDebugOutput) {
1909 auto map = this->
getMap ();
1910 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1911 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1912 std::ostringstream os;
1913 os <<
"Proc " << myRank <<
": " << longFuncName <<
": ";
1914 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1915 os <<
"Start" << endl;
1916 std::cerr << os.str ();
1920 if (importLIDs.extent (0) == 0) {
1921 if (printDebugOutput) {
1922 std::ostringstream os;
1923 os << *prefix <<
"No imports. Done!" << endl;
1924 std::cerr << os.str ();
1930 if (importsAreAliased()) {
1931 if (printDebugOutput) {
1932 std::ostringstream os;
1933 os << *prefix <<
"Skipping unpack, since imports_ is aliased to MV.view_. Done!" << endl;
1934 std::cerr << os.str ();
1940 const size_t numVecs = getNumVectors ();
1941 if (debugCheckIndices) {
1942 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1943 (
static_cast<size_t> (imports.extent (0)) !=
1944 numVecs * importLIDs.extent (0),
1946 "imports.extent(0) = " << imports.extent (0)
1947 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1948 <<
" * " << importLIDs.extent (0) <<
" = "
1949 << numVecs * importLIDs.extent (0) <<
".");
1951 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1952 (constantNumPackets ==
static_cast<size_t> (0), std::runtime_error,
1953 "constantNumPackets input argument must be nonzero.");
1955 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1956 (
static_cast<size_t> (numVecs) !=
1957 static_cast<size_t> (constantNumPackets),
1958 std::runtime_error,
"constantNumPackets must equal numVecs.");
1967 if (this->imports_.need_sync_host()) this->imports_.sync_host();
1970 if (this->imports_.need_sync_device()) this->imports_.sync_device();
1973 if (printDebugOutput) {
1974 std::ostringstream os;
1975 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1977 std::cerr << os.str ();
1982 auto imports_d = imports.view_device ();
1983 auto imports_h = imports.view_host ();
1984 auto importLIDs_d = importLIDs.view_device ();
1985 auto importLIDs_h = importLIDs.view_host ();
1987 Kokkos::DualView<size_t*, device_type> whichVecs;
1989 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1990 Kokkos::MemoryUnmanaged> whichVecsIn (
whichVectors_.getRawPtr (),
1992 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1994 whichVecs.modify_host ();
1996 Kokkos::deep_copy (whichVecs.view_host (), whichVecsIn);
1999 whichVecs.modify_device ();
2001 Kokkos::deep_copy (whichVecs.view_device (), whichVecsIn);
2004 auto whichVecs_d = whichVecs.view_device ();
2005 auto whichVecs_h = whichVecs.view_host ();
2015 if (numVecs > 0 && importLIDs.extent (0) > 0) {
2016 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
2017 using host_exec_space =
typename dual_view_type::t_host::execution_space;
2020 const bool use_atomic_updates = unpackOnHost ?
2021 host_exec_space().concurrency () != 1 :
2022 dev_exec_space().concurrency () != 1;
2024 if (printDebugOutput) {
2025 std::ostringstream os;
2027 std::cerr << os.str ();
2034 using op_type = KokkosRefactor::Details::InsertOp;
2038 unpack_array_multi_column (host_exec_space (),
2039 X_h, imports_h, importLIDs_h,
2040 op_type (), numVecs,
2047 unpack_array_multi_column (space,
2048 X_d, imports_d, importLIDs_d,
2049 op_type (), numVecs,
2056 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2057 unpack_array_multi_column_variable_stride (host_exec_space (),
2067 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2068 unpack_array_multi_column_variable_stride (space,
2080 using op_type = KokkosRefactor::Details::AddOp;
2081 if (isConstantStride ()) {
2083 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2084 unpack_array_multi_column (host_exec_space (),
2085 X_h, imports_h, importLIDs_h,
2086 op_type (), numVecs,
2091 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2092 unpack_array_multi_column (space,
2093 X_d, imports_d, importLIDs_d,
2094 op_type (), numVecs,
2101 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2102 unpack_array_multi_column_variable_stride (host_exec_space (),
2112 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2113 unpack_array_multi_column_variable_stride (space,
2125 using op_type = KokkosRefactor::Details::AbsMaxOp;
2126 if (isConstantStride ()) {
2129 unpack_array_multi_column (host_exec_space (),
2130 X_h, imports_h, importLIDs_h,
2131 op_type (), numVecs,
2136 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2137 unpack_array_multi_column (space,
2138 X_d, imports_d, importLIDs_d,
2139 op_type (), numVecs,
2146 auto X_h = this->getLocalViewHost(Access::ReadWrite);
2147 unpack_array_multi_column_variable_stride (host_exec_space (),
2157 auto X_d = this->getLocalViewDevice(Access::ReadWrite);
2158 unpack_array_multi_column_variable_stride (space,
2170 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2171 (
true, std::logic_error,
"Invalid CombineMode");
2175 if (printDebugOutput) {
2176 std::ostringstream os;
2177 os << *prefix <<
"Nothing to unpack" << endl;
2178 std::cerr << os.str ();
2182 if (printDebugOutput) {
2183 std::ostringstream os;
2184 os << *prefix <<
"Done!" << endl;
2185 std::cerr << os.str ();
2190 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2194 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
2195 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
2196 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
2197 const size_t constantNumPackets,
2199 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets, CM,
execution_space());
2203 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2209 return static_cast<size_t> (
view_.extent (1));
2219 gblDotImpl (
const RV& dotsOut,
2220 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2221 const bool distributed)
2223 using Teuchos::REDUCE_MAX;
2224 using Teuchos::REDUCE_SUM;
2225 using Teuchos::reduceAll;
2226 typedef typename RV::non_const_value_type dot_type;
2228 const size_t numVecs = dotsOut.extent (0);
2243 if (distributed && ! comm.is_null ()) {
2246 const int nv =
static_cast<int> (numVecs);
2249 if (commIsInterComm) {
2253 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
2255 Kokkos::deep_copy (lclDots, dotsOut);
2256 const dot_type*
const lclSum = lclDots.data ();
2257 dot_type*
const gblSum = dotsOut.data ();
2258 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2261 dot_type*
const inout = dotsOut.data ();
2262 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
2268 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2272 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
2275 using Kokkos::subview;
2276 using Teuchos::Comm;
2277 using Teuchos::null;
2280 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
2281 typedef typename dual_view_type::t_dev_const XMV;
2282 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
2290 const size_t lclNumRows = this->getLocalLength ();
2291 const size_t numDots =
static_cast<size_t> (dots.extent (0));
2292 const bool debug = Behavior::debug ();
2295 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
2296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2297 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
2298 "compatible with the input MultiVector A. We only test for this "
2309 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2311 "MultiVectors do not have the same local length. "
2312 "this->getLocalLength() = " << lclNumRows <<
" != "
2314 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2316 "MultiVectors must have the same number of columns (vectors). "
2317 "this->getNumVectors() = " << numVecs <<
" != "
2319 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2320 numDots != numVecs, std::runtime_error,
2321 "The output array 'dots' must have the same number of entries as the "
2322 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2323 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2325 const std::pair<size_t, size_t> colRng (0, numVecs);
2326 RV dotsOut = subview (dots, colRng);
2327 RCP<const Comm<int> > comm = this->
getMap ().is_null () ? null :
2328 this->
getMap ()->getComm ();
2333 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
2345 exec_space_instance.fence();
2347 gblDotImpl (dotsOut, comm, this->isDistributed ());
2351 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2358 using dot_type =
typename MV::dot_type;
2359 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
2362 Teuchos::RCP<const Teuchos::Comm<int> > comm =
2363 map.is_null () ? Teuchos::null : map->getComm ();
2364 if (comm.is_null ()) {
2365 return Kokkos::ArithTraits<dot_type>::zero ();
2368 using LO = LocalOrdinal;
2372 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
2374 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
2375 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
2376 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
2383 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
2385 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
2386 lclDot = KokkosBlas::dot (x_1d, y_1d);
2389 using Teuchos::outArg;
2390 using Teuchos::REDUCE_SUM;
2391 using Teuchos::reduceAll;
2392 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2404 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2408 const Teuchos::ArrayView<dot_type>& dots)
const
2410 const char tfecfFuncName[] =
"dot: ";
2415 const size_t numDots =
static_cast<size_t> (dots.size ());
2425 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2427 "MultiVectors do not have the same local length. "
2428 "this->getLocalLength() = " << lclNumRows <<
" != "
2430 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2432 "MultiVectors must have the same number of columns (vectors). "
2433 "this->getNumVectors() = " << numVecs <<
" != "
2435 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2436 (numDots != numVecs, std::runtime_error,
2437 "The output array 'dots' must have the same number of entries as the "
2438 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2439 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2442 const dot_type gblDot = multiVectorSingleColumnDot (*
this, A);
2446 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2450 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2453 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2456 using ::Tpetra::Details::NORM_TWO;
2458 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2461 MV& X =
const_cast<MV&
> (*this);
2462 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2465 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2468 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2470 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2471 this->
norm2 (norms_av);
2475 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2478 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2481 using ::Tpetra::Details::NORM_ONE;
2483 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2486 MV& X =
const_cast<MV&
> (*this);
2487 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2490 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2493 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2495 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2496 this->
norm1 (norms_av);
2499 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2502 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2505 using ::Tpetra::Details::NORM_INF;
2507 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2510 MV& X =
const_cast<MV&
> (*this);
2511 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2514 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2517 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2519 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2523 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2526 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2530 using Kokkos::subview;
2531 using Teuchos::Comm;
2533 using Teuchos::reduceAll;
2534 using Teuchos::REDUCE_SUM;
2535 typedef Kokkos::ArithTraits<impl_scalar_type> ATS;
2539 const size_t numMeans =
static_cast<size_t> (means.size ());
2541 TEUCHOS_TEST_FOR_EXCEPTION(
2542 numMeans != numVecs, std::runtime_error,
2543 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2544 <<
" != this->getNumVectors() = " << numVecs <<
".");
2546 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2547 const std::pair<size_t, size_t> colRng (0, numVecs);
2552 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2554 typename local_view_type::HostMirror::array_layout,
2556 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2557 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2559 RCP<const Comm<int> > comm = this->
getMap ().is_null () ? Teuchos::null :
2560 this->
getMap ()->getComm ();
2564 if (useHostVersion) {
2567 rowRng, Kokkos::ALL ());
2569 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2571 KokkosBlas::sum (lclSums, X_lcl);
2574 for (
size_t j = 0; j < numVecs; ++j) {
2576 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2583 if (! comm.is_null () && this->isDistributed ()) {
2584 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2585 lclSums.data (), meansOut.data ());
2589 Kokkos::deep_copy (meansOut, lclSums);
2595 rowRng, Kokkos::ALL ());
2598 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2600 KokkosBlas::sum (lclSums, X_lcl);
2603 for (
size_t j = 0; j < numVecs; ++j) {
2605 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2615 exec_space_instance.fence();
2621 if (! comm.is_null () && this->isDistributed ()) {
2622 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2623 lclSums.data (), meansOut.data ());
2627 Kokkos::deep_copy (meansOut, lclSums);
2636 for (
size_t k = 0; k < numMeans; ++k) {
2637 meansOut(k) = meansOut(k) * OneOverN;
2642 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2648 typedef Kokkos::ArithTraits<IST> ATS;
2649 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2650 typedef typename pool_type::generator_type generator_type;
2652 const IST max = Kokkos::rand<generator_type, IST>::max ();
2653 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2655 this->
randomize (
static_cast<Scalar
> (min),
static_cast<Scalar
> (max));
2659 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2662 randomize (
const Scalar& minVal,
const Scalar& maxVal)
2665 typedef Tpetra::Details::Static_Random_XorShift64_Pool<typename device_type::execution_space> tpetra_pool_type;
2666 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2669 if(!tpetra_pool_type::isSet())
2670 tpetra_pool_type::resetPool(this->
getMap()->getComm()->getRank());
2672 pool_type & rand_pool = tpetra_pool_type::getPool();
2673 const IST max =
static_cast<IST
> (maxVal);
2674 const IST min =
static_cast<IST
> (minVal);
2679 Kokkos::fill_random (thisView, rand_pool, min, max);
2683 for (
size_t k = 0; k < numVecs; ++k) {
2685 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2686 Kokkos::fill_random (X_k, rand_pool, min, max);
2691 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2698 using DES =
typename dual_view_type::t_dev::execution_space;
2699 using HES =
typename dual_view_type::t_host::execution_space;
2700 using LO = LocalOrdinal;
2701 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2707 const LO numVecs =
static_cast<LO
> (this->
getNumVectors ());
2713 const bool runOnHost = runKernelOnHost(*
this);
2716 if (this->isConstantStride ()) {
2717 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2720 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2726 if (this->isConstantStride ()) {
2727 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2730 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2737 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2740 replaceMap (
const Teuchos::RCP<const map_type>& newMap)
2742 using Teuchos::ArrayRCP;
2743 using Teuchos::Comm;
2746 using LO = LocalOrdinal;
2747 using GO = GlobalOrdinal;
2753 TEUCHOS_TEST_FOR_EXCEPTION(
2754 ! this->isConstantStride (), std::logic_error,
2755 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2756 "if the MultiVector is a column view of another MultiVector (that is, if "
2757 "isConstantStride() == false).");
2787 if (this->
getMap ().is_null ()) {
2792 TEUCHOS_TEST_FOR_EXCEPTION(
2793 newMap.is_null (), std::invalid_argument,
2794 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2795 "This probably means that the input Map is incorrect.");
2799 const size_t newNumRows = newMap->getLocalNumElements ();
2800 const size_t origNumRows =
view_.extent (0);
2803 if (origNumRows != newNumRows ||
view_.extent (1) != numCols) {
2804 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2807 else if (newMap.is_null ()) {
2810 const size_t newNumRows =
static_cast<size_t> (0);
2812 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2815 this->
map_ = newMap;
2818 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2821 scale (
const Scalar& alpha)
2826 const IST theAlpha =
static_cast<IST
> (alpha);
2827 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2832 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2833 const std::pair<size_t, size_t> colRng (0, numVecs);
2842 if (useHostVersion) {
2843 auto Y_lcl = Kokkos::subview (
getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2845 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2848 for (
size_t k = 0; k < numVecs; ++k) {
2850 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2851 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2856 auto Y_lcl = Kokkos::subview (
getLocalViewDevice(Access::ReadWrite), rowRng, ALL ());
2858 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2861 for (
size_t k = 0; k < numVecs; ++k) {
2863 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2864 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2871 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2874 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2877 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2878 TEUCHOS_TEST_FOR_EXCEPTION(
2879 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2880 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2884 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2885 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2886 k_alphas.modify_host ();
2887 for (
size_t i = 0; i < numAlphas; ++i) {
2890 k_alphas.sync_device ();
2892 this->
scale (k_alphas.view_device ());
2895 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2898 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2901 using Kokkos::subview;
2905 TEUCHOS_TEST_FOR_EXCEPTION(
2906 static_cast<size_t> (alphas.extent (0)) != numVecs,
2907 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2908 "alphas.extent(0) = " << alphas.extent (0)
2909 <<
" != this->getNumVectors () = " << numVecs <<
".");
2910 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2911 const std::pair<size_t, size_t> colRng (0, numVecs);
2922 if (useHostVersion) {
2925 auto alphas_h = Kokkos::create_mirror_view (alphas);
2927 Kokkos::deep_copy (alphas_h, alphas);
2929 auto Y_lcl = subview (this->
getLocalViewHost(Access::ReadWrite), rowRng, ALL ());
2931 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2934 for (
size_t k = 0; k < numVecs; ++k) {
2935 const size_t Y_col = this->isConstantStride () ? k :
2937 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2940 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2947 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2954 auto alphas_h = Kokkos::create_mirror_view (alphas);
2956 Kokkos::deep_copy (alphas_h, alphas);
2958 for (
size_t k = 0; k < numVecs; ++k) {
2959 const size_t Y_col = this->isConstantStride () ? k :
2961 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2962 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2968 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2971 scale (
const Scalar& alpha,
2975 using Kokkos::subview;
2976 const char tfecfFuncName[] =
"scale: ";
2981 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2983 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2985 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2987 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2991 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2992 const std::pair<size_t, size_t> colRng (0, numVecs);
2996 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
2997 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
3000 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
3004 for (
size_t k = 0; k < numVecs; ++k) {
3005 const size_t Y_col = this->isConstantStride () ? k : this->
whichVectors_[k];
3007 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3008 auto X_k = subview (X_lcl, ALL (), X_col);
3010 KokkosBlas::scal (Y_k, theAlpha, X_k);
3017 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3022 const char tfecfFuncName[] =
"reciprocal: ";
3024 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3026 "MultiVectors do not have the same local length. "
3029 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3030 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3031 ": MultiVectors do not have the same number of columns (vectors). "
3033 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3041 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
3045 using Kokkos::subview;
3046 for (
size_t k = 0; k < numVecs; ++k) {
3048 auto vector_k = subview (this_view_dev, ALL (), this_col);
3050 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3051 KokkosBlas::reciprocal (vector_k, vector_Ak);
3056 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3061 const char tfecfFuncName[] =
"abs";
3063 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3065 ": MultiVectors do not have the same local length. "
3068 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3069 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3070 ": MultiVectors do not have the same number of columns (vectors). "
3072 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3079 KokkosBlas::abs (this_view_dev, A_view_dev);
3083 using Kokkos::subview;
3085 for (
size_t k=0; k < numVecs; ++k) {
3087 auto vector_k = subview (this_view_dev, ALL (), this_col);
3089 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
3090 KokkosBlas::abs (vector_k, vector_Ak);
3095 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3098 update (
const Scalar& alpha,
3102 const char tfecfFuncName[] =
"update: ";
3103 using Kokkos::subview;
3112 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3114 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
3116 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3118 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
3124 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3125 const std::pair<size_t, size_t> colRng (0, numVecs);
3128 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
3130 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
3134 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
3138 for (
size_t k = 0; k < numVecs; ++k) {
3139 const size_t Y_col = this->isConstantStride () ? k : this->
whichVectors_[k];
3141 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3142 auto X_k = subview (X_lcl, ALL (), X_col);
3144 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
3149 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3152 update (
const Scalar& alpha,
3156 const Scalar& gamma)
3159 using Kokkos::subview;
3161 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3169 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3171 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
3172 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3174 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3176 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
3177 "row(s), but this MultiVector has " << lclNumRows <<
" local "
3179 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3181 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
3182 "but this MultiVector has " << numVecs <<
" column(s).");
3183 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3185 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
3186 "but this MultiVector has " << numVecs <<
" column(s).");
3193 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3194 const std::pair<size_t, size_t> colRng (0, numVecs);
3204 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3209 for (
size_t k = 0; k < numVecs; ++k) {
3213 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3214 theBeta, subview (B_lcl, rowRng, B_col),
3215 theGamma, subview (C_lcl, rowRng, this_col));
3220 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3221 Teuchos::ArrayRCP<const Scalar>
3227 const char tfecfFuncName[] =
"getData: ";
3236 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
3237 Teuchos::ArrayRCP<const IST> dataAsArcp =
3238 Kokkos::Compat::persistingView (hostView_j, 0,
getLocalLength ());
3240 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3241 (
static_cast<size_t> (hostView_j.extent (0)) <
3242 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3243 "hostView_j.extent(0) = " << hostView_j.extent (0)
3244 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3245 "Please report this bug to the Tpetra developers.");
3247 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3250 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3251 Teuchos::ArrayRCP<Scalar>
3256 using Kokkos::subview;
3258 const char tfecfFuncName[] =
"getDataNonConst: ";
3262 auto hostView_j = subview (hostView, ALL (), col);
3263 Teuchos::ArrayRCP<IST> dataAsArcp =
3264 Kokkos::Compat::persistingView (hostView_j, 0,
getLocalLength ());
3266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3267 (
static_cast<size_t> (hostView_j.extent (0)) <
3268 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3269 "hostView_j.extent(0) = " << hostView_j.extent (0)
3270 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3271 "Please report this bug to the Tpetra developers.");
3273 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3276 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3277 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3279 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3286 bool contiguous =
true;
3287 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3288 for (
size_t j = 1; j < numCopyVecs; ++j) {
3289 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3294 if (contiguous && numCopyVecs > 0) {
3295 return this->
subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3298 RCP<const MV> X_sub = this->
subView (cols);
3299 RCP<MV> Y (
new MV (this->
getMap (), numCopyVecs,
false));
3305 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3306 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3308 subCopy (
const Teuchos::Range1D &colRng)
const
3313 RCP<const MV> X_sub = this->
subView (colRng);
3314 RCP<MV> Y (
new MV (this->
getMap (),
static_cast<size_t> (colRng.size ()),
false));
3319 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3323 return view_.origExtent(0);
3326 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3330 return view_.origExtent(1);
3333 template <
class Scalar,
class LO,
class GO,
class Node>
3336 const Teuchos::RCP<const map_type>& subMap,
3341 using Kokkos::subview;
3342 using Teuchos::outArg;
3345 using Teuchos::reduceAll;
3346 using Teuchos::REDUCE_MIN;
3349 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3350 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3353 std::unique_ptr<std::ostringstream> errStrm;
3360 const auto comm = subMap->getComm ();
3361 TEUCHOS_ASSERT( ! comm.is_null () );
3362 const int myRank = comm->getRank ();
3364 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3366 const LO newNumRows =
static_cast<LO
> (subMap->getLocalNumElements ());
3368 std::ostringstream os;
3369 os <<
"Proc " << myRank <<
": " << prefix
3370 <<
"X: {lclNumRows: " << lclNumRowsBefore
3372 <<
", numCols: " << numCols <<
"}, "
3373 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3374 std::cerr << os.str ();
3379 const bool tooManyElts =
3382 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3383 *errStrm <<
" Proc " << myRank <<
": subMap->getLocalNumElements() (="
3384 << newNumRows <<
") + rowOffset (=" << rowOffset
3388 TEUCHOS_TEST_FOR_EXCEPTION
3389 (! debug && tooManyElts, std::invalid_argument,
3390 prefix << errStrm->str () << suffix);
3394 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3396 std::ostringstream gblErrStrm;
3397 const std::string myErrStr =
3398 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3400 TEUCHOS_TEST_FOR_EXCEPTION
3401 (
true, std::invalid_argument, gblErrStrm.str ());
3405 using range_type = std::pair<LO, LO>;
3406 const range_type origRowRng
3407 (rowOffset,
static_cast<LO
> (X.
view_.origExtent (0)));
3408 const range_type rowRng
3409 (rowOffset, rowOffset + newNumRows);
3411 wrapped_dual_view_type newView = takeSubview (X.
view_, rowRng, ALL ());
3418 if (newView.extent (0) == 0 &&
3419 newView.extent (1) != X.
view_.extent (1)) {
3421 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3425 MV (subMap, newView) :
3429 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3430 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3431 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3433 if (errStrm.get () ==
nullptr) {
3434 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3436 *errStrm <<
" Proc " << myRank <<
3437 ": subMap.getLocalNumElements(): " << newNumRows <<
3438 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3439 ", X.getNumVectors(): " << numCols <<
3440 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3442 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3444 std::ostringstream gblErrStrm;
3446 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3447 "dimensions on one or more processes:" << endl;
3449 const std::string myErrStr =
3450 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3452 gblErrStrm << suffix << endl;
3453 TEUCHOS_TEST_FOR_EXCEPTION
3454 (
true, std::invalid_argument, gblErrStrm.str ());
3459 std::ostringstream os;
3460 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3461 std::cerr << os.str ();
3467 std::ostringstream os;
3468 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3469 std::cerr << os.str ();
3473 template <
class Scalar,
class LO,
class GO,
class Node>
3477 const size_t rowOffset) :
3482 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3483 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3485 offsetView (
const Teuchos::RCP<const map_type>& subMap,
3486 const size_t offset)
const
3489 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3492 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3493 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3496 const size_t offset)
3499 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3502 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3503 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3505 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3507 using Teuchos::Array;
3511 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3512 TEUCHOS_TEST_FOR_EXCEPTION(
3513 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3514 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3515 "contain at least one entry, but cols.size() = " << cols.size ()
3520 bool contiguous =
true;
3521 for (
size_t j = 1; j < numViewCols; ++j) {
3522 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3528 if (numViewCols == 0) {
3530 return rcp (
new MV (this->
getMap (), numViewCols));
3533 return this->
subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3538 return rcp (
new MV (this->
getMap (),
view_, cols));
3541 Array<size_t> newcols (cols.size ());
3542 for (
size_t j = 0; j < numViewCols; ++j) {
3545 return rcp (
new MV (this->
getMap (),
view_, newcols ()));
3550 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3551 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3553 subView (
const Teuchos::Range1D& colRng)
const
3557 using Kokkos::subview;
3558 using Teuchos::Array;
3562 const char tfecfFuncName[] =
"subView(Range1D): ";
3569 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3570 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3571 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3574 numVecs != 0 && colRng.size () != 0 &&
3575 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3576 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3577 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3578 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3579 "[0, " << numVecs <<
"].");
3581 RCP<const MV> X_ret;
3591 const std::pair<size_t, size_t> rows (0, lclNumRows);
3592 if (colRng.size () == 0) {
3593 const std::pair<size_t, size_t> cols (0, 0);
3594 wrapped_dual_view_type X_sub = takeSubview (this->
view_, ALL (), cols);
3595 X_ret = rcp (
new MV (this->
getMap (), X_sub));
3600 const std::pair<size_t, size_t> cols (colRng.lbound (),
3601 colRng.ubound () + 1);
3602 wrapped_dual_view_type X_sub = takeSubview (this->
view_, ALL (), cols);
3603 X_ret = rcp (
new MV (this->
getMap (), X_sub));
3606 if (
static_cast<size_t> (colRng.size ()) ==
static_cast<size_t> (1)) {
3609 const std::pair<size_t, size_t> col (
whichVectors_[0] + colRng.lbound (),
3611 wrapped_dual_view_type X_sub = takeSubview (
view_, ALL (), col);
3612 X_ret = rcp (
new MV (this->
getMap (), X_sub));
3615 Array<size_t> which (
whichVectors_.begin () + colRng.lbound (),
3617 X_ret = rcp (
new MV (this->
getMap (),
view_, which));
3622 const bool debug = Behavior::debug ();
3624 using Teuchos::Comm;
3625 using Teuchos::outArg;
3626 using Teuchos::REDUCE_MIN;
3627 using Teuchos::reduceAll;
3629 RCP<const Comm<int> > comm = this->
getMap ().is_null () ?
3630 Teuchos::null : this->
getMap ()->getComm ();
3631 if (! comm.is_null ()) {
3635 if (X_ret.is_null ()) {
3638 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3640 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3641 "MultiVector; the return value of this method) is null on some MPI "
3642 "process in this MultiVector's communicator. This should never "
3643 "happen. Please report this bug to the Tpetra developers.");
3644 if (! X_ret.is_null () &&
3645 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3648 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3649 outArg (gblSuccess));
3650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3651 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3652 "colRng.size(), on at least one MPI process in this MultiVector's "
3653 "communicator. This should never happen. "
3654 "Please report this bug to the Tpetra developers.");
3661 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3662 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3667 return Teuchos::rcp_const_cast<MV> (this->
subView (cols));
3671 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3672 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3677 return Teuchos::rcp_const_cast<MV> (this->
subView (colRng));
3681 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3685 : base_type (X.
getMap ())
3687 using Kokkos::subview;
3688 typedef std::pair<size_t, size_t> range_type;
3689 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3692 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3693 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3694 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3696 static_cast<size_t> (j) :
3698 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3709 const size_t newSize = X.
imports_.extent (0) / numCols;
3710 const size_t offset = jj*newSize;
3711 auto device_view = subview (X.
imports_.view_device(),
3712 range_type (offset, offset+newSize));
3713 auto host_view = subview (X.
imports_.view_host(),
3714 range_type (offset, offset+newSize));
3718 const size_t newSize = X.
exports_.extent (0) / numCols;
3719 const size_t offset = jj*newSize;
3720 auto device_view = subview (X.
exports_.view_device(),
3721 range_type (offset, offset+newSize));
3722 auto host_view = subview (X.
exports_.view_host(),
3723 range_type (offset, offset+newSize));
3734 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3735 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3740 return Teuchos::rcp (
new V (*
this, j));
3744 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3745 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3750 return Teuchos::rcp (
new V (*
this, j));
3754 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3757 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3760 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3762 Kokkos::MemoryUnmanaged>;
3763 const char tfecfFuncName[] =
"get1dCopy: ";
3768 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3769 (LDA < numRows, std::runtime_error,
3770 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3771 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3772 (numRows >
size_t (0) && numCols >
size_t (0) &&
3773 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3775 "A.size() = " << A.size () <<
", but its size must be at least "
3776 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3778 const std::pair<size_t, size_t> rowRange (0, numRows);
3779 const std::pair<size_t, size_t> colRange (0, numCols);
3781 input_view_type A_view_orig (
reinterpret_cast<IST*
> (A.getRawPtr ()),
3783 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3799 throw std::runtime_error(
"Tpetra::MultiVector: A non-const view is alive outside and we cannot give a copy where host or device view can be modified outside");
3802 const bool useHostView =
view_.host_view_use_count() >=
view_.device_view_use_count();
3803 if (this->isConstantStride ()) {
3807 Kokkos::deep_copy (A_view, srcView_host);
3811 Kokkos::deep_copy (A_view, srcView_device);
3815 for (
size_t j = 0; j < numCols; ++j) {
3817 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3821 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3823 Kokkos::deep_copy (dstColView, srcColView_host);
3826 auto srcColView_device = Kokkos::subview (srcView_device, rowRange, srcCol);
3828 Kokkos::deep_copy (dstColView, srcColView_device);
3836 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3839 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3842 const char tfecfFuncName[] =
"get2dCopy: ";
3846 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3847 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3848 std::runtime_error,
"Input array of pointers must contain as many "
3849 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3850 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3852 if (numRows != 0 && numCols != 0) {
3854 for (
size_t j = 0; j < numCols; ++j) {
3855 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3856 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3857 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3858 "the input array of arrays is not long enough to fit all entries in "
3859 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3860 <<
" < getLocalLength() = " << numRows <<
".");
3864 for (
size_t j = 0; j < numCols; ++j) {
3865 Teuchos::RCP<const V> X_j = this->
getVector (j);
3866 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3867 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3873 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3874 Teuchos::ArrayRCP<const Scalar>
3879 return Teuchos::null;
3881 TEUCHOS_TEST_FOR_EXCEPTION(
3883 "get1dView: This MultiVector does not have constant stride, so it is "
3884 "not possible to view its data as a single array. You may check "
3885 "whether a MultiVector has constant stride by calling "
3886 "isConstantStride().");
3891 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3892 Kokkos::Compat::persistingView (X_lcl);
3893 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3897 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3898 Teuchos::ArrayRCP<Scalar>
3903 return Teuchos::null;
3906 TEUCHOS_TEST_FOR_EXCEPTION
3908 "get1dViewNonConst: This MultiVector does not have constant stride, "
3909 "so it is not possible to view its data as a single array. You may "
3910 "check whether a MultiVector has constant stride by calling "
3911 "isConstantStride().");
3913 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3914 Kokkos::Compat::persistingView (X_lcl);
3915 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3919 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3920 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3932 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3934 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3935 for (
size_t j = 0; j < numCols; ++j) {
3936 const size_t col = this->isConstantStride () ? j : this->
whichVectors_[j];
3937 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3938 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3939 Kokkos::Compat::persistingView (X_lcl_j);
3940 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3945 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3950 return view_.getHostView(s);
3953 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3958 return view_.getHostView(s);
3961 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3966 return view_.getHostView(s);
3969 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3974 return view_.getDeviceView(s);
3977 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3982 return view_.getDeviceView(s);
3985 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3990 return view_.getDeviceView(s);
3993 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3994 typename MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::wrapped_dual_view_type
4000 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4001 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
4016 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
4018 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
4019 for (
size_t j = 0; j < numCols; ++j) {
4020 const size_t col = this->isConstantStride () ? j : this->
whichVectors_[j];
4021 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
4022 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
4023 Kokkos::Compat::persistingView (X_lcl_j);
4024 views[j] = Teuchos::arcp_reinterpret_cast<const Scalar> (X_lcl_j_arcp);
4029 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4033 Teuchos::ETransp transB,
4034 const Scalar& alpha,
4040 using Teuchos::CONJ_TRANS;
4041 using Teuchos::NO_TRANS;
4042 using Teuchos::TRANS;
4045 using Teuchos::rcpFromRef;
4047 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
4049 using STS = Teuchos::ScalarTraits<Scalar>;
4051 const char tfecfFuncName[] =
"multiply: ";
4052 ProfilingRegion region (
"Tpetra::MV::multiply");
4084 const bool C_is_local = ! this->isDistributed ();
4094 auto myMap = this->
getMap ();
4095 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
4096 using Teuchos::REDUCE_MIN;
4097 using Teuchos::reduceAll;
4098 using Teuchos::outArg;
4100 auto comm = myMap->getComm ();
4101 const size_t A_nrows =
4103 const size_t A_ncols =
4105 const size_t B_nrows =
4107 const size_t B_ncols =
4113 const int myRank = comm->getRank ();
4114 std::ostringstream errStrm;
4116 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
4118 <<
"." << std::endl;
4121 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
4123 <<
"." << std::endl;
4125 if (A_ncols != B_nrows) {
4126 errStrm <<
"Proc " << myRank <<
": A_ncols="
4127 << A_ncols <<
" != B_nrows=" << B_nrows
4128 <<
"." << std::endl;
4131 const int lclGood = lclBad ? 0 : 1;
4133 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
4135 std::ostringstream os;
4138 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4139 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
4140 "least one process in this object's communicator." << std::endl
4142 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
4144 << (transA == Teuchos::TRANS ?
"^T" :
4145 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4146 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
4148 << (transA == Teuchos::TRANS ?
"^T" :
4149 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
4150 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
4161 const bool Case1 = C_is_local && A_is_local && B_is_local;
4163 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
4164 transA != NO_TRANS &&
4167 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
4171 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4172 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
4173 "Multiplication of op(A) and op(B) into *this is not a "
4174 "supported use case.");
4176 if (beta != STS::zero () && Case2) {
4182 const int myRank = this->
getMap ()->getComm ()->getRank ();
4184 beta_local = ATS::zero ();
4194 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
4196 C_tmp = rcp (
this,
false);
4199 RCP<const MV> A_tmp;
4201 A_tmp = rcp (
new MV (A, Teuchos::Copy));
4203 A_tmp = rcpFromRef (A);
4206 RCP<const MV> B_tmp;
4208 B_tmp = rcp (
new MV (B, Teuchos::Copy));
4210 B_tmp = rcpFromRef (B);
4213 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4214 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
4215 ! A_tmp->isConstantStride (), std::logic_error,
4216 "Failed to make temporary constant-stride copies of MultiVectors.");
4219 const LO A_lclNumRows = A_tmp->getLocalLength ();
4220 const LO A_numVecs = A_tmp->getNumVectors ();
4221 auto A_lcl = A_tmp->getLocalViewDevice(Access::ReadOnly);
4222 auto A_sub = Kokkos::subview (A_lcl,
4223 std::make_pair (LO (0), A_lclNumRows),
4224 std::make_pair (LO (0), A_numVecs));
4227 const LO B_lclNumRows = B_tmp->getLocalLength ();
4228 const LO B_numVecs = B_tmp->getNumVectors ();
4229 auto B_lcl = B_tmp->getLocalViewDevice(Access::ReadOnly);
4230 auto B_sub = Kokkos::subview (B_lcl,
4231 std::make_pair (LO (0), B_lclNumRows),
4232 std::make_pair (LO (0), B_numVecs));
4234 const LO C_lclNumRows = C_tmp->getLocalLength ();
4235 const LO C_numVecs = C_tmp->getNumVectors ();
4237 auto C_lcl = C_tmp->getLocalViewDevice(Access::ReadWrite);
4238 auto C_sub = Kokkos::subview (C_lcl,
4239 std::make_pair (LO (0), C_lclNumRows),
4240 std::make_pair (LO (0), C_numVecs));
4241 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4242 (transA == Teuchos::TRANS ?
'T' :
'C'));
4243 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4244 (transB == Teuchos::TRANS ?
'T' :
'C'));
4247 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
4249 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4258 A_tmp = Teuchos::null;
4259 B_tmp = Teuchos::null;
4267 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4276 using Kokkos::subview;
4277 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4282 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4284 std::runtime_error,
"MultiVectors do not have the same local length.");
4285 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4286 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4287 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4300 KokkosBlas::mult (scalarThis,
4303 subview (A_view, ALL (), 0),
4307 for (
size_t j = 0; j < numVecs; ++j) {
4310 KokkosBlas::mult (scalarThis,
4311 subview (this_view, ALL (), C_col),
4313 subview (A_view, ALL (), 0),
4314 subview (B_view, ALL (), B_col));
4319 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4326 ProfilingRegion region (
"Tpetra::MV::reduce");
4328 const auto map = this->
getMap ();
4329 if (map.get () ==
nullptr) {
4332 const auto comm = map->getComm ();
4333 if (comm.get () ==
nullptr) {
4340 if (changed_on_device) {
4344 Kokkos::fence(
"MultiVector::reduce");
4346 allReduceView (X_lcl, X_lcl, *comm);
4350 allReduceView (X_lcl, X_lcl, *comm);
4354 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4362 const LocalOrdinal minLocalIndex = this->
getMap()->getMinLocalIndex();
4363 const LocalOrdinal maxLocalIndex = this->
getMap()->getMaxLocalIndex();
4364 TEUCHOS_TEST_FOR_EXCEPTION(
4365 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4367 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4368 <<
" is invalid. The range of valid row indices on this process "
4369 << this->
getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4370 <<
", " << maxLocalIndex <<
"].");
4371 TEUCHOS_TEST_FOR_EXCEPTION(
4372 vectorIndexOutOfRange(col),
4374 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4375 <<
" of the multivector is invalid.");
4380 view (lclRow, colInd) = ScalarValue;
4384 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4393 const LocalOrdinal minLocalIndex = this->
getMap()->getMinLocalIndex();
4394 const LocalOrdinal maxLocalIndex = this->
getMap()->getMaxLocalIndex();
4395 TEUCHOS_TEST_FOR_EXCEPTION(
4396 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4398 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4399 <<
" is invalid. The range of valid row indices on this process "
4400 << this->
getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4401 <<
", " << maxLocalIndex <<
"].");
4402 TEUCHOS_TEST_FOR_EXCEPTION(
4403 vectorIndexOutOfRange(col),
4405 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4406 <<
" of the multivector is invalid.");
4413 Kokkos::atomic_add (& (view(lclRow, colInd)), value);
4416 view(lclRow, colInd) += value;
4421 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4430 const LocalOrdinal lclRow = this->
map_->getLocalElement (gblRow);
4433 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4434 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4435 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4437 "Global row index " << gblRow <<
" is not present on this process "
4438 << this->
getMap ()->getComm ()->getRank () <<
".");
4439 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4440 (this->vectorIndexOutOfRange (col), std::runtime_error,
4441 "Vector index " << col <<
" of the MultiVector is invalid.");
4447 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4457 const LocalOrdinal lclRow = this->
map_->getLocalElement (globalRow);
4460 TEUCHOS_TEST_FOR_EXCEPTION(
4461 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4463 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4464 <<
" is not present on this process "
4465 << this->
getMap ()->getComm ()->getRank () <<
".");
4466 TEUCHOS_TEST_FOR_EXCEPTION(
4467 vectorIndexOutOfRange(col),
4469 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4470 <<
" of the multivector is invalid.");
4476 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4478 Teuchos::ArrayRCP<T>
4484 typename dual_view_type::array_layout,
4487 col_dual_view_type X_col =
4488 Kokkos::subview (
view_, Kokkos::ALL (), col);
4489 return Kokkos::Compat::persistingView (X_col.view_device());
4492 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4496 return view_.need_sync_host ();
4499 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4503 return view_.need_sync_device ();
4506 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4511 using Teuchos::TypeNameTraits;
4513 std::ostringstream out;
4514 out <<
"\"" << className <<
"\": {";
4515 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4516 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4517 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4518 <<
", Node" << Node::name ()
4520 if (this->getObjectLabel () !=
"") {
4521 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4524 if (className !=
"Tpetra::Vector") {
4526 <<
", isConstantStride: " << this->isConstantStride ();
4528 if (this->isConstantStride ()) {
4529 out <<
", columnStride: " << this->
getStride ();
4536 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4546 template<
typename ViewType>
4547 void print_vector(Teuchos::FancyOStream & out,
const char* prefix,
const ViewType& v)
4550 static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace, typename ViewType::memory_space>::accessible,
4551 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a host-accessible view.");
4552 static_assert(ViewType::rank == 2,
4553 "Tpetra::MultiVector::localDescribeToString: Details::print_vector should be given a rank-2 view.");
4556 out <<
"Values("<<prefix<<
"): " << std::endl
4558 const size_t numRows = v.extent(0);
4559 const size_t numCols = v.extent(1);
4561 for (
size_t i = 0; i < numRows; ++i) {
4563 if (i + 1 < numRows) {
4569 for (
size_t i = 0; i < numRows; ++i) {
4570 for (
size_t j = 0; j < numCols; ++j) {
4572 if (j + 1 < numCols) {
4576 if (i + 1 < numRows) {
4585 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4590 typedef LocalOrdinal LO;
4593 if (vl <= Teuchos::VERB_LOW) {
4594 return std::string ();
4596 auto map = this->
getMap ();
4597 if (map.is_null ()) {
4598 return std::string ();
4600 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4601 auto outp = Teuchos::getFancyOStream (outStringP);
4602 Teuchos::FancyOStream& out = *outp;
4603 auto comm = map->getComm ();
4604 const int myRank = comm->getRank ();
4605 const int numProcs = comm->getSize ();
4607 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4608 Teuchos::OSTab tab1 (out);
4612 out <<
"Local number of rows: " << lclNumRows << endl;
4614 if (vl > Teuchos::VERB_MEDIUM) {
4618 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4620 if (this->isConstantStride ()) {
4621 out <<
"Column stride: " << this->
getStride () << endl;
4624 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4632 auto X_dev =
view_.getDeviceCopy();
4633 auto X_host =
view_.getHostCopy();
4635 if(X_dev.data() == X_host.data()) {
4637 Details::print_vector(out,
"unified",X_host);
4640 Details::print_vector(out,
"host",X_host);
4641 Details::print_vector(out,
"dev",X_dev);
4646 return outStringP->str ();
4649 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4653 const std::string& className,
4654 const Teuchos::EVerbosityLevel verbLevel)
const
4656 using Teuchos::TypeNameTraits;
4657 using Teuchos::VERB_DEFAULT;
4658 using Teuchos::VERB_NONE;
4659 using Teuchos::VERB_LOW;
4661 const Teuchos::EVerbosityLevel vl =
4662 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4664 if (vl == VERB_NONE) {
4671 auto map = this->
getMap ();
4672 if (map.is_null ()) {
4675 auto comm = map->getComm ();
4676 if (comm.is_null ()) {
4680 const int myRank = comm->getRank ();
4689 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4693 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4694 out <<
"\"" << className <<
"\":" << endl;
4695 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4697 out <<
"Template parameters:" << endl;
4698 Teuchos::OSTab tab2 (out);
4699 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4700 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4701 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4702 <<
"Node: " << Node::name () << endl;
4704 if (this->getObjectLabel () !=
"") {
4705 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4708 if (className !=
"Tpetra::Vector") {
4709 out <<
"Number of columns: " << this->
getNumVectors () << endl;
4716 if (vl > VERB_LOW) {
4722 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4725 describe (Teuchos::FancyOStream &out,
4726 const Teuchos::EVerbosityLevel verbLevel)
const
4728 this->
describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4731 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4739 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4745 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4747 TEUCHOS_TEST_FOR_EXCEPTION
4749 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4750 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4751 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4752 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4753 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4755 TEUCHOS_TEST_FOR_EXCEPTION
4757 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4758 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4759 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4774 if (src_last_updated_on_host) {
4777 this->isConstantStride (),
4779 this->whichVectors_ (),
4785 this->isConstantStride (),
4787 this->whichVectors_ (),
4792 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4794 Teuchos::RCP<MultiVector<T, LocalOrdinal, GlobalOrdinal, Node> >
4807 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4823 template <
class ST,
class LO,
class GO,
class NT>
4826 std::swap(mv.
map_, this->map_);
4827 std::swap(mv.
view_, this->view_);
4831#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4832 template <
class ST,
class LO,
class GO,
class NT>
4835 const Teuchos::SerialDenseMatrix<int, ST>& src)
4839 using IST =
typename MV::impl_scalar_type;
4840 using input_view_type =
4841 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4842 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4843 using pair_type = std::pair<LO, LO>;
4845 const LO numRows =
static_cast<LO
> (src.numRows ());
4846 const LO numCols =
static_cast<LO
> (src.numCols ());
4848 TEUCHOS_TEST_FOR_EXCEPTION
4851 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4852 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4854 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4856 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4857 input_view_type src_orig (src_raw, src.stride (), numCols);
4858 input_view_type src_view =
4859 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4861 constexpr bool src_isConstantStride =
true;
4862 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4866 src_isConstantStride,
4867 getMultiVectorWhichVectors (dst),
4871 template <
class ST,
class LO,
class GO,
class NT>
4873 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4878 using IST =
typename MV::impl_scalar_type;
4879 using output_view_type =
4880 Kokkos::View<IST**, Kokkos::LayoutLeft,
4881 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4882 using pair_type = std::pair<LO, LO>;
4884 const LO numRows =
static_cast<LO
> (dst.numRows ());
4885 const LO numCols =
static_cast<LO
> (dst.numCols ());
4887 TEUCHOS_TEST_FOR_EXCEPTION
4890 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4891 << src.
getMap ()->getComm ()->getRank () <<
", src is "
4893 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4895 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4896 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4898 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4900 constexpr bool dst_isConstantStride =
true;
4901 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4904 localDeepCopy (dst_view,
4906 dst_isConstantStride,
4909 getMultiVectorWhichVectors (src));
4913 template <
class Scalar,
class LO,
class GO,
class NT>
4914 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4919 return Teuchos::rcp (
new MV (map, numVectors));
4922 template <
class ST,
class LO,
class GO,
class NT>
4940#ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4941# define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4942 template class MultiVector< SCALAR , LO , GO , NODE >; \
4943 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4944 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4945 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4946 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4949# define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4950 template class MultiVector< SCALAR , LO , GO , NODE >; \
4951 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4956#define TPETRA_MULTIVECTOR_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
4958 template Teuchos::RCP< MultiVector< SO , LO , GO , NODE > > \
4959 MultiVector< SI , LO , GO , NODE >::convert< SO > () const;
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
All-reduce a 1-D or 2-D Kokkos::View.
Declaration of functions for checking whether a given pointer is accessible from a given Kokkos execu...
Declaration and definition of Tpetra::Details::Blas::fill, an implementation detail of Tpetra::MultiV...
void fill(const ExecutionSpace &execSpace, const ViewType &X, const ValueType &alpha, const IndexType numRows, const IndexType numCols)
Fill the entries of the given 1-D or 2-D Kokkos::View with the given scalar value alpha.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::isInterComm.
Declaration and definition of Tpetra::Details::lclDot, an implementation detail of Tpetra::MultiVecto...
Declaration and definition of Tpetra::Details::normImpl, which is an implementation detail of Tpetra:...
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t multivectorKernelLocationThreshold()
the threshold for transitioning from device to host
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
Kokkos::DualView< size_t *, buffer_device_type > numImportPacketsPerLID_
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Kokkos::DualView< size_t *, buffer_device_type > numExportPacketsPerLID_
Teuchos::RCP< const map_type > map_
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
bool isDistributed() const
Whether this is a globally distributed object.
A parallel distribution of indices over processes.
One or more distributed dense vectors.
void reciprocal(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,...
void normInf(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the infinity-norm of each vector (column), storing the result in a host View.
void get1dCopy(const Teuchos::ArrayView< Scalar > &A, const size_t LDA) const
Fill the given array with a copy of this multivector's local values.
size_t getStride() const
Stride between columns in the multivector.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool areRemoteLIDsContiguous=false, const CombineMode CM=INSERT) override
Reallocate imports_ if needed.
void reduce()
Sum values of a locally replicated multivector across all processes.
virtual bool checkSizes(const SrcDistObject &sourceObj) override
Whether data redistribution between sourceObj and this object is legal.
void randomize()
Set all values in the multivector to pseudorandom numbers.
typename map_type::local_ordinal_type local_ordinal_type
The type of local indices that this class uses.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void dot(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayView< dot_type > &dots) const
Compute the dot product of each corresponding pair of vectors (columns) in A and B.
void describeImpl(Teuchos::FancyOStream &out, const std::string &className, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Implementation of describe() for this class, and its subclass Vector.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Map and their communicator.
Teuchos::ArrayRCP< Scalar > get1dViewNonConst()
Nonconst persisting (1-D) view of this multivector's local values.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &src)
Copy the contents of src into *this (deep copy).
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
void meanValue(const Teuchos::ArrayView< impl_scalar_type > &means) const
Compute mean (average) value of each column.
std::string descriptionImpl(const std::string &className) const
Implementation of description() for this class, and its subclass Vector.
void multiply(Teuchos::ETransp transA, Teuchos::ETransp transB, const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Scalar &beta)
Matrix-matrix multiplication: this = beta*this + alpha*op(A)*op(B).
bool need_sync_device() const
Whether this MultiVector needs synchronization to the device.
wrapped_dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
void replaceLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using local (row) index.
void swap(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &mv)
Swap contents of mv with contents of *this.
size_t getLocalLength() const
Local number of rows on the calling process.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subCopy(const Teuchos::Range1D &colRng) const
Return a MultiVector with copies of selected columns.
Teuchos::ArrayRCP< const Scalar > getData(size_t j) const
Const view of the local values in a particular vector of this multivector.
bool need_sync_host() const
Whether this MultiVector needs synchronization to the host.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM, const execution_space &space) override
Same as copyAndPermute, but do operations in space.
void norm2(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the two-norm of each vector (column), storing the result in a host View.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
Teuchos::ArrayRCP< const Scalar > get1dView() const
Const persisting (1-D) view of this multivector's local values.
Teuchos::ArrayRCP< T > getSubArrayRCP(Teuchos::ArrayRCP< T > arr, size_t j) const
Persisting view of j-th column in the given ArrayRCP.
void replaceGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value)
Replace value in host memory, using global row index.
void elementWiseMultiply(Scalar scalarAB, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarThis)
Multiply a Vector A elementwise by a MultiVector B.
size_t getNumVectors() const
Map< LocalOrdinal, GlobalOrdinal, Node > map_type
The type of the Map specialization used by this class.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, device_type > dual_view_type
Kokkos::DualView specialization used by this class.
virtual size_t constantNumberOfPackets() const override
Number of packets to send per LID.
wrapped_dual_view_type getWrappedDualView() const
Return the wrapped dual view holding this MultiVector's local data.
void sumIntoLocalValue(const LocalOrdinal lclRow, const size_t col, const impl_scalar_type &val, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using local row index.
void replaceMap(const Teuchos::RCP< const map_type > &map)
Replace the underlying Map in place.
Teuchos::RCP< MultiVector< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another MultiVector with the same entries, but converted to a different Scalar type T.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
void abs(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Put element-wise absolute values of input Multi-vector in target: A = abs(this).
void norm1(const Kokkos::View< mag_type *, Kokkos::HostSpace > &norms) const
Compute the one-norm of each vector (column), storing the result in a host view.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print the object with the given verbosity level to a FancyOStream.
void get2dCopy(const Teuchos::ArrayView< const Teuchos::ArrayView< Scalar > > &ArrayOfPtrs) const
Fill the given array with a copy of this multivector's local values.
void sumIntoGlobalValue(const GlobalOrdinal gblRow, const size_t col, const impl_scalar_type &value, const bool atomic=useAtomicUpdatesByDefault)
Update (+=) a value in host memory, using global row index.
typename Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
bool aliases(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &other) const
Whether this multivector's memory might alias other. This is conservative: if either this or other is...
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subViewNonConst(const Teuchos::Range1D &colRng)
Return a MultiVector with views of selected columns.
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
Teuchos::ArrayRCP< Scalar > getDataNonConst(size_t j)
View of the local values in a particular vector of this multivector.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getOrigNumLocalCols() const
"Original" number of columns in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< Scalar > > get2dViewNonConst()
Return non-const persisting pointers to values.
virtual std::string description() const override
A simple one-line description of this object.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > offsetViewNonConst(const Teuchos::RCP< const map_type > &subMap, const size_t offset)
Return a nonconst view of a subset of rows.
std::string localDescribeToString(const Teuchos::EVerbosityLevel vl) const
Print the calling process' verbose describe() information to the returned string.
bool isSameSize(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &vec) const
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< const Scalar > > get2dView() const
Return const persisting pointers to values.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
base_type::impl_scalar_type impl_scalar_type
base_type::dot_type dot_type
base_type::wrapped_dual_view_type wrapped_dual_view_type
Implementation details of Tpetra.
Nonmember function that computes a residual Computes R = B - A * X.
void normImpl(MagnitudeType norms[], const Kokkos::View< const ValueType **, ArrayLayout, DeviceType > &X, const EWhichNorm whichNorm, const Teuchos::ArrayView< const size_t > &whichVecs, const bool isConstantStride, const bool isDistributed, const Teuchos::Comm< int > *comm)
Implementation of MultiVector norms.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
void localDeepCopy(const DstViewType &dst, const SrcViewType &src, const bool dstConstStride, const bool srcConstStride, const DstWhichVecsType &dstWhichVecs, const SrcWhichVecsType &srcWhichVecs)
Implementation of Tpetra::MultiVector deep copy of local data.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
MultiVector< ST, LO, GO, NT > createCopy(const MultiVector< ST, LO, GO, NT > &src)
Return a deep copy of the given MultiVector.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ ADD_ASSIGN
Accumulate new values into existing values (may not be supported in all classes).
@ INSERT
Insert new values that don't currently exist.
Traits class for packing / unpacking data of type T.