Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockMultiVector_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
11#define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
12
13#include "Tpetra_BlockMultiVector_decl.hpp"
15#include "Tpetra_BlockView.hpp"
16#include "Teuchos_OrdinalTraits.hpp"
17
18
19namespace Tpetra {
20
21template<class Scalar, class LO, class GO, class Node>
28
29template<class Scalar, class LO, class GO, class Node>
33{
34 return mv_;
35}
36
37template<class Scalar, class LO, class GO, class Node>
38Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
39BlockMultiVector<Scalar, LO, GO, Node>::
40getBlockMultiVectorFromSrcDistObject (const Tpetra::SrcDistObject& src)
41{
42 typedef BlockMultiVector<Scalar, LO, GO, Node> BMV;
43 const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
44 TEUCHOS_TEST_FOR_EXCEPTION(
45 src_bmv == nullptr, std::invalid_argument, "Tpetra::"
46 "BlockMultiVector: The source object of an Import or Export to a "
47 "BlockMultiVector, must also be a BlockMultiVector.");
48 return Teuchos::rcp (src_bmv, false); // nonowning RCP
49}
50
51template<class Scalar, class LO, class GO, class Node>
54 const Teuchos::DataAccess copyOrView) :
55 dist_object_type (in),
56 meshMap_ (in.meshMap_),
57 pointMap_ (in.pointMap_),
58 mv_ (in.mv_, copyOrView),
59 blockSize_ (in.blockSize_)
60{}
61
62template<class Scalar, class LO, class GO, class Node>
64BlockMultiVector (const map_type& meshMap,
65 const LO blockSize,
66 const LO numVecs) :
67 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
68 meshMap_ (meshMap),
69 pointMap_ (makePointMapRCP (meshMap, blockSize)),
70 mv_ (pointMap_, numVecs), // nonowning RCP is OK, since pointMap_ won't go away
71 blockSize_ (blockSize)
72{}
73
74template<class Scalar, class LO, class GO, class Node>
76BlockMultiVector (const map_type& meshMap,
77 const map_type& pointMap,
78 const LO blockSize,
79 const LO numVecs) :
80 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
81 meshMap_ (meshMap),
82 pointMap_ (new map_type(pointMap)),
83 mv_ (pointMap_, numVecs),
84 blockSize_ (blockSize)
85{}
86
87template<class Scalar, class LO, class GO, class Node>
89BlockMultiVector (const mv_type& X_mv,
90 const map_type& meshMap,
91 const LO blockSize) :
92 dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
93 meshMap_ (meshMap),
94 blockSize_ (blockSize)
95{
96 using Teuchos::RCP;
97
98 if (X_mv.getCopyOrView () == Teuchos::View) {
99 // The input MultiVector has view semantics, so assignment just
100 // does a shallow copy.
101 mv_ = X_mv;
102 }
103 else if (X_mv.getCopyOrView () == Teuchos::Copy) {
104 // The input MultiVector has copy semantics. We can't change
105 // that, but we can make a view of the input MultiVector and
106 // change the view to have view semantics. (That sounds silly;
107 // shouldn't views always have view semantics? but remember that
108 // "view semantics" just governs the default behavior of the copy
109 // constructor and assignment operator.)
110 RCP<const mv_type> X_view_const;
111 const size_t numCols = X_mv.getNumVectors ();
112 if (numCols == 0) {
113 Teuchos::Array<size_t> cols (0); // view will have zero columns
114 X_view_const = X_mv.subView (cols ());
115 } else { // Range1D is an inclusive range
116 X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
117 }
118 TEUCHOS_TEST_FOR_EXCEPTION(
119 X_view_const.is_null (), std::logic_error, "Tpetra::"
120 "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
121 "should never happen. Please report this bug to the Tpetra developers.");
122
123 // It's perfectly OK to cast away const here. Those view methods
124 // should be marked const anyway, because views can't change the
125 // allocation (just the entries themselves).
126 RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
127 TEUCHOS_TEST_FOR_EXCEPTION(
128 X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
129 "BlockMultiVector constructor: We just set a MultiVector "
130 "to have view semantics, but it claims that it doesn't have view "
131 "semantics. This should never happen. "
132 "Please report this bug to the Tpetra developers.");
133 mv_ = *X_view; // MultiVector::operator= does a shallow copy here
134 }
135
136 // At this point, mv_ has been assigned, so we can ignore X_mv.
137 Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
138 pointMap_ = pointMap;
139
140}
141
142template<class Scalar, class LO, class GO, class Node>
145 const map_type& newMeshMap,
146 const map_type& newPointMap,
147 const size_t offset) :
148 dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
149 meshMap_ (newMeshMap),
150 pointMap_ (new map_type(newPointMap)),
151 mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
152 blockSize_ (X.getBlockSize ())
153{}
154
155template<class Scalar, class LO, class GO, class Node>
158 const map_type& newMeshMap,
159 const size_t offset) :
160 dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
161 meshMap_ (newMeshMap),
162 pointMap_ (makePointMapRCP (newMeshMap, X.getBlockSize ())),
163 mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
164 blockSize_ (X.getBlockSize ())
165{}
166
167template<class Scalar, class LO, class GO, class Node>
170 dist_object_type (Teuchos::null),
171 blockSize_ (0)
172{}
173
174template<class Scalar, class LO, class GO, class Node>
177makePointMap (const map_type& meshMap, const LO blockSize)
178{
179typedef Tpetra::global_size_t GST;
180 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
181
182 const GST gblNumMeshMapInds =
183 static_cast<GST> (meshMap.getGlobalNumElements ());
184 const size_t lclNumMeshMapIndices =
185 static_cast<size_t> (meshMap.getLocalNumElements ());
186 const GST gblNumPointMapInds =
187 gblNumMeshMapInds * static_cast<GST> (blockSize);
188 const size_t lclNumPointMapInds =
189 lclNumMeshMapIndices * static_cast<size_t> (blockSize);
190 const GO indexBase = meshMap.getIndexBase ();
191
192 if (meshMap.isContiguous ()) {
193 return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
194 meshMap.getComm ());
195 }
196 else {
197 // "Hilbert's Hotel" trick: multiply each process' GIDs by
198 // blockSize, and fill in. That ensures correctness even if the
199 // mesh Map is overlapping.
200 Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList ();
201 const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
202 Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
203 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
204 const GO meshGid = lclMeshGblInds[g];
205 const GO pointGidStart = indexBase +
206 (meshGid - indexBase) * static_cast<GO> (blockSize);
207 const size_type offset = g * static_cast<size_type> (blockSize);
208 for (LO k = 0; k < blockSize; ++k) {
209 const GO pointGid = pointGidStart + static_cast<GO> (k);
210 lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
211 }
212 }
213 return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
214 meshMap.getComm ());
215
216 }
217}
218
219
220template<class Scalar, class LO, class GO, class Node>
221Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::map_type>
223makePointMapRCP (const map_type& meshMap, const LO blockSize)
224{
225typedef Tpetra::global_size_t GST;
226 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
227
228 const GST gblNumMeshMapInds =
229 static_cast<GST> (meshMap.getGlobalNumElements ());
230 const size_t lclNumMeshMapIndices =
231 static_cast<size_t> (meshMap.getLocalNumElements ());
232 const GST gblNumPointMapInds =
233 gblNumMeshMapInds * static_cast<GST> (blockSize);
234 const size_t lclNumPointMapInds =
235 lclNumMeshMapIndices * static_cast<size_t> (blockSize);
236 const GO indexBase = meshMap.getIndexBase ();
237
238 if (meshMap.isContiguous ()) {
239 return Teuchos::rcp(new map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
240 meshMap.getComm ()));
241 }
242 else {
243 // "Hilbert's Hotel" trick: multiply each process' GIDs by
244 // blockSize, and fill in. That ensures correctness even if the
245 // mesh Map is overlapping.
246 Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getLocalElementList ();
247 const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
248 Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
249 for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
250 const GO meshGid = lclMeshGblInds[g];
251 const GO pointGidStart = indexBase +
252 (meshGid - indexBase) * static_cast<GO> (blockSize);
253 const size_type offset = g * static_cast<size_type> (blockSize);
254 for (LO k = 0; k < blockSize; ++k) {
255 const GO pointGid = pointGidStart + static_cast<GO> (k);
256 lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
257 }
258 }
259 return Teuchos::rcp(new map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
260 meshMap.getComm ()));
261
262 }
263}
264
265template<class Scalar, class LO, class GO, class Node>
266void
268replaceLocalValuesImpl (const LO localRowIndex,
269 const LO colIndex,
270 const Scalar vals[])
271{
272 auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
273 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
274 getBlockSize ());
275 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
276 using exec_space = typename device_type::execution_space;
277 Kokkos::deep_copy (exec_space(), X_dst, X_src);
278}
279
280
281template<class Scalar, class LO, class GO, class Node>
282bool
284replaceLocalValues (const LO localRowIndex,
285 const LO colIndex,
286 const Scalar vals[])
287{
288 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
289 return false;
290 } else {
291 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
292 return true;
293 }
294}
295
296template<class Scalar, class LO, class GO, class Node>
297bool
299replaceGlobalValues (const GO globalRowIndex,
300 const LO colIndex,
301 const Scalar vals[])
302{
303 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
304 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
305 return false;
306 } else {
307 replaceLocalValuesImpl (localRowIndex, colIndex, vals);
308 return true;
309 }
310}
311
312template<class Scalar, class LO, class GO, class Node>
313void
315sumIntoLocalValuesImpl (const LO localRowIndex,
316 const LO colIndex,
317 const Scalar vals[])
318{
319 auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
320 typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
321 getBlockSize ());
322 AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
323}
324
325template<class Scalar, class LO, class GO, class Node>
326bool
328sumIntoLocalValues (const LO localRowIndex,
329 const LO colIndex,
330 const Scalar vals[])
331{
332 if (! meshMap_.isNodeLocalElement (localRowIndex)) {
333 return false;
334 } else {
335 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
336 return true;
337 }
338}
339
340template<class Scalar, class LO, class GO, class Node>
341bool
343sumIntoGlobalValues (const GO globalRowIndex,
344 const LO colIndex,
345 const Scalar vals[])
346{
347 const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
348 if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
349 return false;
350 } else {
351 sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
352 return true;
353 }
354}
355
356
357template<class Scalar, class LO, class GO, class Node>
358typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
360getLocalBlockHost (const LO localRowIndex,
361 const LO colIndex,
362 const Access::ReadOnlyStruct) const
363{
364 if (!isValidLocalMeshIndex(localRowIndex)) {
365 return const_little_host_vec_type();
366 } else {
367 const size_t blockSize = getBlockSize();
368 auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
369 LO startRow = localRowIndex*blockSize;
370 LO endRow = startRow + blockSize;
371 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
372 colIndex);
373 }
374}
375
376template<class Scalar, class LO, class GO, class Node>
377typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
379getLocalBlockHost (const LO localRowIndex,
380 const LO colIndex,
381 const Access::OverwriteAllStruct)
382{
383 if (!isValidLocalMeshIndex(localRowIndex)) {
384 return little_host_vec_type();
385 } else {
386 const size_t blockSize = getBlockSize();
387 auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
388 LO startRow = localRowIndex*blockSize;
389 LO endRow = startRow + blockSize;
390 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
391 colIndex);
392 }
393}
394
395template<class Scalar, class LO, class GO, class Node>
396typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
398getLocalBlockHost (const LO localRowIndex,
399 const LO colIndex,
400 const Access::ReadWriteStruct)
401{
402 if (!isValidLocalMeshIndex(localRowIndex)) {
403 return little_host_vec_type();
404 } else {
405 const size_t blockSize = getBlockSize();
406 auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
407 LO startRow = localRowIndex*blockSize;
408 LO endRow = startRow + blockSize;
409 return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
410 colIndex);
411 }
412}
413
414template<class Scalar, class LO, class GO, class Node>
415Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
417getMultiVectorFromSrcDistObject (const Tpetra::SrcDistObject& src)
418{
419 using Teuchos::rcp;
420 using Teuchos::rcpFromRef;
421
422 // The source object of an Import or Export must be a
423 // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
424 // them in that order; one must succeed. Note that the target of
425 // the Import or Export calls checkSizes in DistObject's doTransfer.
426 typedef BlockMultiVector<Scalar, LO, GO, Node> this_BMV_type;
427 const this_BMV_type* srcBlkVec = dynamic_cast<const this_BMV_type*> (&src);
428 if (srcBlkVec == nullptr) {
429 const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
430 if (srcMultiVec == nullptr) {
431 // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
432 // currently does a shallow copy by default. This is why we
433 // return by RCP, rather than by value.
434 return rcp (new mv_type ());
435 } else { // src is a MultiVector
436 return rcp (srcMultiVec, false); // nonowning RCP
437 }
438 } else { // src is a BlockMultiVector
439 return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
440 }
441}
442
443template<class Scalar, class LO, class GO, class Node>
446{
447 return ! getMultiVectorFromSrcDistObject (src).is_null ();
448}
449
450template<class Scalar, class LO, class GO, class Node>
453(const SrcDistObject& src,
454 const size_t numSameIDs,
455 const Kokkos::DualView<const local_ordinal_type*,
456 buffer_device_type>& permuteToLIDs,
457 const Kokkos::DualView<const local_ordinal_type*,
458 buffer_device_type>& permuteFromLIDs,
459 const CombineMode CM)
460{
461 TEUCHOS_TEST_FOR_EXCEPTION
462 (true, std::logic_error,
463 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
464 "instead, create a point importer using makePointMap function.");
465}
466
467template<class Scalar, class LO, class GO, class Node>
470(const SrcDistObject& src,
471 const Kokkos::DualView<const local_ordinal_type*,
472 buffer_device_type>& exportLIDs,
473 Kokkos::DualView<packet_type*,
474 buffer_device_type>& exports,
475 Kokkos::DualView<size_t*,
476 buffer_device_type> numPacketsPerLID,
477 size_t& constantNumPackets)
478{
479 TEUCHOS_TEST_FOR_EXCEPTION
480 (true, std::logic_error,
481 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
482 "instead, create a point importer using makePointMap function.");
483}
484
485template<class Scalar, class LO, class GO, class Node>
488(const Kokkos::DualView<const local_ordinal_type*,
489 buffer_device_type>& importLIDs,
490 Kokkos::DualView<packet_type*,
491 buffer_device_type> imports,
492 Kokkos::DualView<size_t*,
493 buffer_device_type> numPacketsPerLID,
494 const size_t constantNumPackets,
495 const CombineMode combineMode)
496{
497 TEUCHOS_TEST_FOR_EXCEPTION
498 (true, std::logic_error,
499 "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
500 "instead, create a point importer using makePointMap function.");
501}
502
503template<class Scalar, class LO, class GO, class Node>
505isValidLocalMeshIndex (const LO meshLocalIndex) const
506{
507 return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
508 meshMap_.isNodeLocalElement (meshLocalIndex);
509}
510
511template<class Scalar, class LO, class GO, class Node>
513putScalar (const Scalar& val)
514{
515 mv_.putScalar (val);
516}
517
518template<class Scalar, class LO, class GO, class Node>
520scale (const Scalar& val)
521{
522 mv_.scale (val);
523}
524
525template<class Scalar, class LO, class GO, class Node>
527update (const Scalar& alpha,
529 const Scalar& beta)
530{
531 mv_.update (alpha, X.mv_, beta);
532}
533
534namespace Impl {
535// Y := alpha * D * X
536template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
537struct BlockWiseMultiply {
538 typedef typename ViewD::size_type Size;
539
540private:
541 typedef typename ViewD::device_type Device;
542 typedef typename ViewD::non_const_value_type ImplScalar;
543 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
544
545 template <typename View>
546 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
547 typename View::device_type, Unmanaged>;
548 typedef UnmanagedView<ViewY> UnMViewY;
549 typedef UnmanagedView<ViewD> UnMViewD;
550 typedef UnmanagedView<ViewX> UnMViewX;
551
552 const Size block_size_;
553 Scalar alpha_;
554 UnMViewY Y_;
555 UnMViewD D_;
556 UnMViewX X_;
557
558public:
559 BlockWiseMultiply (const Size block_size, const Scalar& alpha,
560 const ViewY& Y, const ViewD& D, const ViewX& X)
561 : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
562 {}
563
564 KOKKOS_INLINE_FUNCTION
565 void operator() (const Size k) const {
566 const auto zero = Kokkos::ArithTraits<Scalar>::zero();
567 auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
568 const auto num_vecs = X_.extent(1);
569 for (Size i = 0; i < num_vecs; ++i) {
570 Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
571 auto X_curBlk = Kokkos::subview(X_, kslice, i);
572 auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
573 // Y_curBlk := alpha * D_curBlk * X_curBlk.
574 // Recall that GEMV does an update (+=) of the last argument.
575 Tpetra::FILL(Y_curBlk, zero);
576 Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
577 }
578 }
579};
580
581template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
582inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
583createBlockWiseMultiply (const int block_size, const Scalar& alpha,
584 const ViewY& Y, const ViewD& D, const ViewX& X) {
585 return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
586}
587
588template <typename ViewY,
589 typename Scalar,
590 typename ViewD,
591 typename ViewZ,
592 typename LO = typename ViewY::size_type>
593class BlockJacobiUpdate {
594private:
595 typedef typename ViewD::device_type Device;
596 typedef typename ViewD::non_const_value_type ImplScalar;
597 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
598
599 template <typename ViewType>
600 using UnmanagedView = Kokkos::View<typename ViewType::data_type,
601 typename ViewType::array_layout,
602 typename ViewType::device_type,
603 Unmanaged>;
604 typedef UnmanagedView<ViewY> UnMViewY;
605 typedef UnmanagedView<ViewD> UnMViewD;
606 typedef UnmanagedView<ViewZ> UnMViewZ;
607
608 const LO blockSize_;
609 UnMViewY Y_;
610 const Scalar alpha_;
611 UnMViewD D_;
612 UnMViewZ Z_;
613 const Scalar beta_;
614
615public:
616 BlockJacobiUpdate (const ViewY& Y,
617 const Scalar& alpha,
618 const ViewD& D,
619 const ViewZ& Z,
620 const Scalar& beta) :
621 blockSize_ (D.extent (1)),
622 // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
623 Y_ (Y),
624 alpha_ (alpha),
625 D_ (D),
626 Z_ (Z),
627 beta_ (beta)
628 {
629 static_assert (static_cast<int> (ViewY::rank) == 1,
630 "Y must have rank 1.");
631 static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
632 static_assert (static_cast<int> (ViewZ::rank) == 1,
633 "Z must have rank 1.");
634 // static_assert (static_cast<int> (ViewZ::rank) ==
635 // static_cast<int> (ViewY::rank),
636 // "Z must have the same rank as Y.");
637 }
638
639 KOKKOS_INLINE_FUNCTION void
640 operator() (const LO& k) const
641 {
642 using Kokkos::ALL;
643 using Kokkos::subview;
644 typedef Kokkos::pair<LO, LO> range_type;
645 typedef Kokkos::ArithTraits<Scalar> KAT;
646
647 // We only have to implement the alpha != 0 case.
648
649 auto D_curBlk = subview (D_, k, ALL (), ALL ());
650 const range_type kslice (k*blockSize_, (k+1)*blockSize_);
651
652 // Z.update (STS::one (), X, -STS::one ()); // assume this is done
653
654 auto Z_curBlk = subview (Z_, kslice);
655 auto Y_curBlk = subview (Y_, kslice);
656 // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
657 if (beta_ == KAT::zero ()) {
658 Tpetra::FILL (Y_curBlk, KAT::zero ());
659 }
660 else if (beta_ != KAT::one ()) {
661 Tpetra::SCAL (beta_, Y_curBlk);
662 }
663 Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
664 }
665};
666
667template<class ViewY,
668 class Scalar,
669 class ViewD,
670 class ViewZ,
671 class LO = typename ViewD::size_type>
672void
673blockJacobiUpdate (const ViewY& Y,
674 const Scalar& alpha,
675 const ViewD& D,
676 const ViewZ& Z,
677 const Scalar& beta)
678{
679 static_assert (Kokkos::is_view<ViewY>::value, "Y must be a Kokkos::View.");
680 static_assert (Kokkos::is_view<ViewD>::value, "D must be a Kokkos::View.");
681 static_assert (Kokkos::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
682 static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
683 "Y and Z must have the same rank.");
684 static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
685
686 const auto lclNumMeshRows = D.extent (0);
687
688#ifdef HAVE_TPETRA_DEBUG
689 // D.extent(0) is the (local) number of mesh rows.
690 // D.extent(1) is the block size. Thus, their product should be
691 // the local number of point rows, that is, the number of rows in Y.
692 const auto blkSize = D.extent (1);
693 const auto lclNumPtRows = lclNumMeshRows * blkSize;
694 TEUCHOS_TEST_FOR_EXCEPTION
695 (Y.extent (0) != lclNumPtRows, std::invalid_argument,
696 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
697 "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
698 << " = " << lclNumPtRows << ".");
699 TEUCHOS_TEST_FOR_EXCEPTION
700 (Y.extent (0) != Z.extent (0), std::invalid_argument,
701 "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
702 "Z.extent(0) = " << Z.extent (0) << ".");
703 TEUCHOS_TEST_FOR_EXCEPTION
704 (Y.extent (1) != Z.extent (1), std::invalid_argument,
705 "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
706 "Z.extent(1) = " << Z.extent (1) << ".");
707#endif // HAVE_TPETRA_DEBUG
708
709 BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
710 typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
711 // lclNumMeshRows must fit in LO, else the Map would not be correct.
712 range_type range (0, static_cast<LO> (lclNumMeshRows));
713 Kokkos::parallel_for (range, functor);
714}
715
716} // namespace Impl
717
718template<class Scalar, class LO, class GO, class Node>
720blockWiseMultiply (const Scalar& alpha,
721 const Kokkos::View<const impl_scalar_type***,
722 device_type, Kokkos::MemoryUnmanaged>& D,
724{
725 using Kokkos::ALL;
726 typedef typename device_type::execution_space exec_space;
727 const LO lclNumMeshRows = meshMap_.getLocalNumElements ();
728
729 if (alpha == STS::zero ()) {
730 this->putScalar (STS::zero ());
731 }
732 else { // alpha != 0
733 const LO blockSize = this->getBlockSize ();
734 const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
735 auto X_lcl = X.mv_.getLocalViewDevice (Access::ReadOnly);
736 auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
737 auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
738
739 // Use an explicit RangePolicy with the desired execution space.
740 // Otherwise, if you just use a number, it runs on the default
741 // execution space. For example, if the default execution space
742 // is Cuda but the current execution space is Serial, using just a
743 // number would incorrectly run with Cuda.
744 Kokkos::RangePolicy<exec_space, LO> range (0, lclNumMeshRows);
745 Kokkos::parallel_for (range, bwm);
746 }
747}
748
749template<class Scalar, class LO, class GO, class Node>
751blockJacobiUpdate (const Scalar& alpha,
752 const Kokkos::View<const impl_scalar_type***,
753 device_type, Kokkos::MemoryUnmanaged>& D,
756 const Scalar& beta)
757{
758 using Kokkos::ALL;
759 using Kokkos::subview;
760 typedef impl_scalar_type IST;
761
762 const IST alphaImpl = static_cast<IST> (alpha);
763 const IST betaImpl = static_cast<IST> (beta);
764 const LO numVecs = mv_.getNumVectors ();
765
766 if (alpha == STS::zero ()) { // Y := beta * Y
767 this->scale (beta);
768 }
769 else { // alpha != 0
770 Z.update (STS::one (), X, -STS::one ());
771 for (LO j = 0; j < numVecs; ++j) {
772 auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
773 auto Z_lcl = Z.mv_.getLocalViewDevice (Access::ReadWrite);
774 auto Y_lcl_j = subview (Y_lcl, ALL (), j);
775 auto Z_lcl_j = subview (Z_lcl, ALL (), j);
776 Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
777 }
778 }
779}
780
781} // namespace Tpetra
782
783//
784// Explicit instantiation macro
785//
786// Must be expanded from within the Tpetra namespace!
787//
788#define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
789 template class BlockMultiVector< S, LO, GO, NODE >;
790
791#endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
const mv_type & getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
static Teuchos::RCP< const map_type > makePointMapRCP(const map_type &meshMap, const LO blockSize)
Create and return an owning RCP to the point Map corresponding to the given mesh Map and block size.
map_type meshMap_
Mesh Map given to constructor.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization used for communication buffers.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
Tpetra::Map< LO, GO, Node > map_type
The specialization of Tpetra::Map that this class uses.
typename mv_type::device_type device_type
The Kokkos Device type.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
LO local_ordinal_type
The type of local indices.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Abstract base class for objects that can be the source of an Import or Export operation.
Namespace for new Tpetra features that are not ready for public release, but are ready for evaluation...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
size_t global_size_t
Global size_t object.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
CombineMode
Rule for combining data in an Import or Export.