Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10// clang-format off
11#ifndef TPETRA_DISTOBJECT_DEF_HPP
12#define TPETRA_DISTOBJECT_DEF_HPP
13
21
22#include "Tpetra_Distributor.hpp"
25#include "Tpetra_Details_checkGlobalError.hpp"
27#include "Tpetra_Util.hpp" // Details::createPrefix
28#include "Teuchos_CommHelpers.hpp"
29#include "Teuchos_TypeNameTraits.hpp"
30#include <typeinfo>
31#include <memory>
32#include <sstream>
33
34namespace Tpetra {
35
36 namespace { // (anonymous)
37 template<class DeviceType, class IndexType = size_t>
38 struct SumFunctor {
39 SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
40 viewToSum_ (viewToSum) {}
41 KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
42 lclSum += viewToSum_(i);
43 }
44 Kokkos::View<const size_t*, DeviceType> viewToSum_;
45 };
46
47 template<class DeviceType, class IndexType = size_t>
48 size_t
49 countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
50 {
51 using Kokkos::parallel_reduce;
52 typedef DeviceType DT;
53 typedef typename DT::execution_space DES;
54 typedef Kokkos::RangePolicy<DES, IndexType> range_type;
55
56 const IndexType numOut = numImportPacketsPerLID.extent (0);
57 size_t totalImportPackets = 0;
58 parallel_reduce ("Count import packets",
59 range_type (0, numOut),
60 SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
61 totalImportPackets);
62 return totalImportPackets;
63 }
64 } // namespace (anonymous)
65
66
67 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
69 DistObject (const Teuchos::RCP<const map_type>& map) :
70 map_ (map)
71 {
72#ifdef HAVE_TPETRA_TRANSFER_TIMERS
73 using Teuchos::RCP;
74 using Teuchos::Time;
75 using Teuchos::TimeMonitor;
76
77 RCP<Time> doXferTimer =
78 TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
79 if (doXferTimer.is_null ()) {
80 doXferTimer =
81 TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
82 }
83 doXferTimer_ = doXferTimer;
84
85 RCP<Time> copyAndPermuteTimer =
86 TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
87 if (copyAndPermuteTimer.is_null ()) {
88 copyAndPermuteTimer =
89 TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
90 }
91 copyAndPermuteTimer_ = copyAndPermuteTimer;
92
93 RCP<Time> packAndPrepareTimer =
94 TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
95 if (packAndPrepareTimer.is_null ()) {
96 packAndPrepareTimer =
97 TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
98 }
99 packAndPrepareTimer_ = packAndPrepareTimer;
100
101 RCP<Time> doPostsAndWaitsTimer =
102 TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
103 if (doPostsAndWaitsTimer.is_null ()) {
104 doPostsAndWaitsTimer =
105 TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
106 }
107 doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
108
109 RCP<Time> unpackAndCombineTimer =
110 TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
111 if (unpackAndCombineTimer.is_null ()) {
112 unpackAndCombineTimer =
113 TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
114 }
115 unpackAndCombineTimer_ = unpackAndCombineTimer;
116#endif // HAVE_TPETRA_TRANSFER_TIMERS
117 }
118
119 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
120 std::string
122 description () const
123 {
124 using Teuchos::TypeNameTraits;
125
126 std::ostringstream os;
127 os << "\"Tpetra::DistObject\": {"
128 << "Packet: " << TypeNameTraits<packet_type>::name ()
129 << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
130 << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
131 << ", Node: " << TypeNameTraits<Node>::name ();
132 if (this->getObjectLabel () != "") {
133 os << "Label: \"" << this->getObjectLabel () << "\"";
134 }
135 os << "}";
136 return os.str ();
137 }
138
139 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
140 void
142 describe (Teuchos::FancyOStream &out,
143 const Teuchos::EVerbosityLevel verbLevel) const
144 {
145 using Teuchos::rcpFromRef;
146 using Teuchos::TypeNameTraits;
147 using std::endl;
148 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
149 Teuchos::VERB_LOW : verbLevel;
150 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
151 const int myRank = comm.is_null () ? 0 : comm->getRank ();
152 const int numProcs = comm.is_null () ? 1 : comm->getSize ();
153
154 if (vl != Teuchos::VERB_NONE) {
155 Teuchos::OSTab tab0 (out);
156 if (myRank == 0) {
157 out << "\"Tpetra::DistObject\":" << endl;
158 }
159 Teuchos::OSTab tab1 (out);
160 if (myRank == 0) {
161 out << "Template parameters:" << endl;
162 {
163 Teuchos::OSTab tab2 (out);
164 out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
165 << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
166 << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
167 << "Node: " << TypeNameTraits<node_type>::name () << endl;
168 }
169 if (this->getObjectLabel () != "") {
170 out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
171 }
172 } // if myRank == 0
173
174 // Describe the Map.
175 {
176 if (myRank == 0) {
177 out << "Map:" << endl;
178 }
179 Teuchos::OSTab tab2 (out);
180 map_->describe (out, vl);
181 }
182
183 // At verbosity > VERB_LOW, each process prints something.
184 if (vl > Teuchos::VERB_LOW) {
185 for (int p = 0; p < numProcs; ++p) {
186 if (myRank == p) {
187 out << "Process " << myRank << ":" << endl;
188 Teuchos::OSTab tab2 (out);
189 out << "Export buffer size (in packets): "
190 << exports_.extent (0)
191 << endl
192 << "Import buffer size (in packets): "
193 << imports_.extent (0)
194 << endl;
195 }
196 if (! comm.is_null ()) {
197 comm->barrier (); // give output time to finish
198 comm->barrier ();
199 comm->barrier ();
200 }
201 } // for each process rank p
202 } // if vl > VERB_LOW
203 } // if vl != VERB_NONE
204 }
205
206 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
207 void
209 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
210 {
211 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
212 "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
213 }
214
215 /* These are provided in base DistObject template
216 template<class DistObjectType>
217 void
218 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
219 const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
220 typename DistObjectType::global_ordinal_type,
221 typename DistObjectType::node_type> >& newMap)
222 {
223 input->removeEmptyProcessesInPlace (newMap);
224 if (newMap.is_null ()) { // my process is excluded
225 input = Teuchos::null;
226 }
227 }
228
229 template<class DistObjectType>
230 void
231 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
232 {
233 using Teuchos::RCP;
234 typedef typename DistObjectType::local_ordinal_type LO;
235 typedef typename DistObjectType::global_ordinal_type GO;
236 typedef typename DistObjectType::node_type NT;
237 typedef Map<LO, GO, NT> map_type;
238
239 RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
240 removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
241 }
242 */
243
244 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
245 void
247 doImport (const SrcDistObject& source,
249 const CombineMode CM,
250 const bool restrictedMode)
251 {
252 using Details::Behavior;
253 using std::endl;
254 const char modeString[] = "doImport (forward mode)";
255
256 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
257 // output to std::cerr on every MPI process. This is unwise for
258 // runs with large numbers of MPI processes.
259 const bool verbose = Behavior::verbose("DistObject");
260 std::unique_ptr<std::string> prefix;
261 if (verbose) {
262 prefix = this->createPrefix("DistObject", modeString);
263 std::ostringstream os;
264 os << *prefix << "Start" << endl;
265 std::cerr << os.str ();
266 }
267 this->beginImport(source, importer, CM, restrictedMode);
268 this->endImport(source, importer, CM, restrictedMode);
269 if (verbose) {
270 std::ostringstream os;
271 os << *prefix << "Done" << endl;
272 std::cerr << os.str ();
273 }
274 }
275
276 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
277 void
279 doExport (const SrcDistObject& source,
281 const CombineMode CM,
282 const bool restrictedMode)
283 {
284 using Details::Behavior;
285 using std::endl;
286 const char modeString[] = "doExport (forward mode)";
287
288 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
289 // output to std::cerr on every MPI process. This is unwise for
290 // runs with large numbers of MPI processes.
291 const bool verbose = Behavior::verbose("DistObject");
292 std::unique_ptr<std::string> prefix;
293 if (verbose) {
294 prefix = this->createPrefix("DistObject", modeString);
295 std::ostringstream os;
296 os << *prefix << "Start" << endl;
297 std::cerr << os.str ();
298 }
299 this->beginExport(source, exporter, CM, restrictedMode);
300 this->endExport(source, exporter, CM, restrictedMode);
301 if (verbose) {
302 std::ostringstream os;
303 os << *prefix << "Done" << endl;
304 std::cerr << os.str ();
305 }
306 }
307
308 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
309 void
311 doImport (const SrcDistObject& source,
313 const CombineMode CM,
314 const bool restrictedMode)
315 {
316 using Details::Behavior;
317 using std::endl;
318 const char modeString[] = "doImport (reverse mode)";
319
320 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
321 // output to std::cerr on every MPI process. This is unwise for
322 // runs with large numbers of MPI processes.
323 const bool verbose = Behavior::verbose("DistObject");
324 std::unique_ptr<std::string> prefix;
325 if (verbose) {
326 prefix = this->createPrefix("DistObject", modeString);
327 std::ostringstream os;
328 os << *prefix << "Start" << endl;
329 std::cerr << os.str ();
330 }
331 this->beginImport(source, exporter, CM, restrictedMode);
332 this->endImport(source, exporter, CM, restrictedMode);
333 if (verbose) {
334 std::ostringstream os;
335 os << *prefix << "Done" << endl;
336 std::cerr << os.str ();
337 }
338 }
339
340 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
341 void
343 doExport (const SrcDistObject& source,
345 const CombineMode CM,
346 const bool restrictedMode)
347 {
348 using Details::Behavior;
349 using std::endl;
350 const char modeString[] = "doExport (reverse mode)";
351
352 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
353 // output to std::cerr on every MPI process. This is unwise for
354 // runs with large numbers of MPI processes.
355 const bool verbose = Behavior::verbose("DistObject");
356 std::unique_ptr<std::string> prefix;
357 if (verbose) {
358 prefix = this->createPrefix("DistObject", modeString);
359 std::ostringstream os;
360 os << *prefix << "Start" << endl;
361 std::cerr << os.str ();
362 }
363 this->beginExport(source, importer, CM, restrictedMode);
364 this->endExport(source, importer, CM, restrictedMode);
365 if (verbose) {
366 std::ostringstream os;
367 os << *prefix << "Done" << endl;
368 std::cerr << os.str ();
369 }
370 }
371
372 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
373 void
375 beginImport(const SrcDistObject& source,
377 const CombineMode CM,
378 const bool restrictedMode)
379 {
380 using Details::Behavior;
381 using std::endl;
382 const char modeString[] = "beginImport (forward mode)";
383
384 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
385 // output to std::cerr on every MPI process. This is unwise for
386 // runs with large numbers of MPI processes.
387 const bool verbose = Behavior::verbose("DistObject");
388 std::unique_ptr<std::string> prefix;
389 if (verbose) {
390 prefix = this->createPrefix("DistObject", modeString);
391 std::ostringstream os;
392 os << *prefix << "Start" << endl;
393 std::cerr << os.str ();
394 }
395 this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
396 if (verbose) {
397 std::ostringstream os;
398 os << *prefix << "Done" << endl;
399 std::cerr << os.str ();
400 }
401 }
402
403 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
404 void
406 beginExport(const SrcDistObject& source,
408 const CombineMode CM,
409 const bool restrictedMode)
410 {
411 using Details::Behavior;
412 using std::endl;
413 const char modeString[] = "beginExport (forward mode)";
414
415 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
416 // output to std::cerr on every MPI process. This is unwise for
417 // runs with large numbers of MPI processes.
418 const bool verbose = Behavior::verbose("DistObject");
419 std::unique_ptr<std::string> prefix;
420 if (verbose) {
421 prefix = this->createPrefix("DistObject", modeString);
422 std::ostringstream os;
423 os << *prefix << "Start" << endl;
424 std::cerr << os.str ();
425 }
426 this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
427 if (verbose) {
428 std::ostringstream os;
429 os << *prefix << "Done" << endl;
430 std::cerr << os.str ();
431 }
432 }
433
434 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
435 void
437 beginImport(const SrcDistObject& source,
439 const CombineMode CM,
440 const bool restrictedMode)
441 {
442 using Details::Behavior;
443 using std::endl;
444 const char modeString[] = "beginImport (reverse mode)";
445
446 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
447 // output to std::cerr on every MPI process. This is unwise for
448 // runs with large numbers of MPI processes.
449 const bool verbose = Behavior::verbose("DistObject");
450 std::unique_ptr<std::string> prefix;
451 if (verbose) {
452 prefix = this->createPrefix("DistObject", modeString);
453 std::ostringstream os;
454 os << *prefix << "Start" << endl;
455 std::cerr << os.str ();
456 }
457 this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
458 if (verbose) {
459 std::ostringstream os;
460 os << *prefix << "Done" << endl;
461 std::cerr << os.str ();
462 }
463 }
464
465 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
466 void
468 beginExport(const SrcDistObject& source,
470 const CombineMode CM,
471 const bool restrictedMode)
472 {
473 using Details::Behavior;
474 using std::endl;
475 const char modeString[] = "beginExport (reverse mode)";
476
477 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
478 // output to std::cerr on every MPI process. This is unwise for
479 // runs with large numbers of MPI processes.
480 const bool verbose = Behavior::verbose("DistObject");
481 std::unique_ptr<std::string> prefix;
482 if (verbose) {
483 prefix = this->createPrefix("DistObject", modeString);
484 std::ostringstream os;
485 os << *prefix << "Start" << endl;
486 std::cerr << os.str ();
487 }
488 this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
489 if (verbose) {
490 std::ostringstream os;
491 os << *prefix << "Done" << endl;
492 std::cerr << os.str ();
493 }
494 }
495
496 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
497 void
499 endImport(const SrcDistObject& source,
501 const CombineMode CM,
502 const bool restrictedMode)
503 {
504 using Details::Behavior;
505 using std::endl;
506 const char modeString[] = "endImport (forward mode)";
507
508 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
509 // output to std::cerr on every MPI process. This is unwise for
510 // runs with large numbers of MPI processes.
511 const bool verbose = Behavior::verbose("DistObject");
512 std::unique_ptr<std::string> prefix;
513 if (verbose) {
514 prefix = this->createPrefix("DistObject", modeString);
515 std::ostringstream os;
516 os << *prefix << "Start" << endl;
517 std::cerr << os.str ();
518 }
519 this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
520 if (verbose) {
521 std::ostringstream os;
522 os << *prefix << "Done" << endl;
523 std::cerr << os.str ();
524 }
525 }
526
527 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
528 void
530 endExport(const SrcDistObject& source,
532 const CombineMode CM,
533 const bool restrictedMode)
534 {
535 using Details::Behavior;
536 using std::endl;
537 const char modeString[] = "endExport (forward mode)";
538
539 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
540 // output to std::cerr on every MPI process. This is unwise for
541 // runs with large numbers of MPI processes.
542 const bool verbose = Behavior::verbose("DistObject");
543 std::unique_ptr<std::string> prefix;
544 if (verbose) {
545 prefix = this->createPrefix("DistObject", modeString);
546 std::ostringstream os;
547 os << *prefix << "Start" << endl;
548 std::cerr << os.str ();
549 }
550 this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
551 if (verbose) {
552 std::ostringstream os;
553 os << *prefix << "Done" << endl;
554 std::cerr << os.str ();
555 }
556 }
557
558 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
559 void
560 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
561 endImport(const SrcDistObject& source,
562 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
563 const CombineMode CM,
564 const bool restrictedMode)
565 {
566 using Details::Behavior;
567 using std::endl;
568 const char modeString[] = "endImport (reverse mode)";
570 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
571 // output to std::cerr on every MPI process. This is unwise for
572 // runs with large numbers of MPI processes.
573 const bool verbose = Behavior::verbose("DistObject");
574 std::unique_ptr<std::string> prefix;
575 if (verbose) {
576 prefix = this->createPrefix("DistObject", modeString);
577 std::ostringstream os;
578 os << *prefix << "Start" << endl;
579 std::cerr << os.str ();
580 }
581 this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
582 if (verbose) {
583 std::ostringstream os;
584 os << *prefix << "Done" << endl;
585 std::cerr << os.str ();
587 }
588
589 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
590 void
591 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
592 endExport(const SrcDistObject& source,
593 const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
594 const CombineMode CM,
595 const bool restrictedMode)
596 {
597 using Details::Behavior;
598 using std::endl;
599 const char modeString[] = "endExport (reverse mode)";
600
601 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
602 // output to std::cerr on every MPI process. This is unwise for
603 // runs with large numbers of MPI processes.
604 const bool verbose = Behavior::verbose("DistObject");
605 std::unique_ptr<std::string> prefix;
606 if (verbose) {
607 prefix = this->createPrefix("DistObject", modeString);
608 std::ostringstream os;
609 os << *prefix << "Start" << endl;
610 std::cerr << os.str ();
611 }
612 this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
613 if (verbose) {
614 std::ostringstream os;
615 os << *prefix << "Done" << endl;
616 std::cerr << os.str ();
617 }
618 }
619
620 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
621 bool
623 transferArrived() const {
624 return distributorActor_.isReady();
625 }
626
627 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
628 bool
630 isDistributed () const {
631 return map_->isDistributed ();
632 }
633
634 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
635 size_t
638 return 0; // default implementation; subclasses may override
639 }
640
641 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
642 void
644 doTransfer (const SrcDistObject& src,
645 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
646 const char modeString[],
647 const ReverseOption revOp,
648 const CombineMode CM,
649 bool restrictedMode)
650 {
651 beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
652 endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
653 }
654
655 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
656 bool
658 reallocImportsIfNeeded (const size_t newSize,
659 const bool verbose,
660 const std::string* prefix,
661 const bool /*remoteLIDsContiguous*/,
662 const CombineMode /*CM*/)
663 {
664 if (verbose) {
665 std::ostringstream os;
666 os << *prefix << "Realloc (if needed) imports_ from "
667 << imports_.extent (0) << " to " << newSize << std::endl;
668 std::cerr << os.str ();
669 }
671 const bool reallocated =
672 reallocDualViewIfNeeded (this->imports_, newSize, "imports");
673 if (verbose) {
674 std::ostringstream os;
675 os << *prefix << "Finished realloc'ing imports_" << std::endl;
676 std::cerr << os.str ();
677 }
678 return reallocated;
679 }
680
681 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
682 bool
684 reallocArraysForNumPacketsPerLid (const size_t numExportLIDs,
685 const size_t numImportLIDs)
686 {
687 using Details::Behavior;
690 using std::endl;
691 // If an array is already allocated, and if is at least
692 // tooBigFactor times bigger than it needs to be, free it and
693 // reallocate to the size we need, in order to save space.
694 // Otherwise, take subviews to reduce allocation size.
695 constexpr size_t tooBigFactor = 10;
696
697 const bool verbose = Behavior::verbose("DistObject");
698 std::unique_ptr<std::string> prefix;
699 if (verbose) {
700 prefix = this->createPrefix("DistObject",
701 "reallocArraysForNumPacketsPerLid");
702 std::ostringstream os;
703 os << *prefix
704 << "numExportLIDs: " << numExportLIDs
705 << ", numImportLIDs: " << numImportLIDs
706 << endl;
707 os << *prefix << "DualView status before:" << endl
708 << *prefix
709 << dualViewStatusToString (this->numExportPacketsPerLID_,
710 "numExportPacketsPerLID_")
711 << endl
712 << *prefix
713 << dualViewStatusToString (this->numImportPacketsPerLID_,
714 "numImportPacketsPerLID_")
715 << endl;
716 std::cerr << os.str ();
717 }
718
719 // Reallocate numExportPacketsPerLID_ if needed.
720 const bool firstReallocated =
721 reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
722 numExportLIDs,
723 "numExportPacketsPerLID",
724 tooBigFactor,
725 true); // need fence before, if realloc'ing
726
727 // If we reallocated above, then we fenced after that
728 // reallocation. This means that we don't need to fence again,
729 // before the next reallocation.
730 const bool needFenceBeforeNextAlloc = ! firstReallocated;
731 const bool secondReallocated =
732 reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
733 numImportLIDs,
734 "numImportPacketsPerLID",
735 tooBigFactor,
736 needFenceBeforeNextAlloc);
737
738 if (verbose) {
739 std::ostringstream os;
740 os << *prefix << "DualView status after:" << endl
741 << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
742 "numExportPacketsPerLID_")
743 << endl
744 << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
745 "numImportPacketsPerLID_")
746 << endl;
747 std::cerr << os.str ();
749
750 return firstReallocated || secondReallocated;
751 }
752
753 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
754 void
757 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
758 const char modeString[],
759 const ReverseOption revOp,
760 const CombineMode CM,
761 bool restrictedMode)
762 {
763 using Details::Behavior;
767 using Kokkos::Compat::getArrayView;
768 using Kokkos::Compat::getConstArrayView;
769 using Kokkos::Compat::getKokkosViewDeepCopy;
770 using Kokkos::Compat::create_const_view;
771 using std::endl;
774
775 const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
776 const char funcNameHost[] = "Tpetra::DistObject::beginTransfer[Host]";
777 const char funcNameDevice[] = "Tpetra::DistObject::beginTransfer[Device]";
778 const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
779
780 ProfilingRegion region_doTransfer(funcName);
781 const bool verbose = Behavior::verbose("DistObject");
782 std::shared_ptr<std::string> prefix;
783 if (verbose) {
784 std::ostringstream os;
785 prefix = this->createPrefix("DistObject", "doTransfer");
786 os << *prefix << "Source type: " << Teuchos::typeName(src)
787 << ", Target type: " << Teuchos::typeName(*this) << endl;
788 std::cerr << os.str();
789 }
790
791 // "Restricted Mode" does two things:
792 // 1) Skips copyAndPermute
793 // 2) Allows the "target" Map of the transfer to be a subset of
794 // the Map of *this, in a "locallyFitted" sense.
795 //
796 // This cannot be used if #2 is not true, OR there are permutes.
797 // Source Maps still need to match
798
799 // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
800 // checks. These may communicate more.
801 const bool debug = Behavior::debug("DistObject");
802 if (debug) {
803 if (! restrictedMode && revOp == DoForward) {
804 const bool myMapSameAsTransferTgtMap =
805 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
806 TEUCHOS_TEST_FOR_EXCEPTION
807 (! myMapSameAsTransferTgtMap, std::invalid_argument,
808 "Tpetra::DistObject::" << modeString << ": For forward-mode "
809 "communication, the target DistObject's Map must be the same "
810 "(in the sense of Tpetra::Map::isSameAs) as the input "
811 "Export/Import object's target Map.");
812 }
813 else if (! restrictedMode && revOp == DoReverse) {
814 const bool myMapSameAsTransferSrcMap =
815 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
816 TEUCHOS_TEST_FOR_EXCEPTION
817 (! myMapSameAsTransferSrcMap, std::invalid_argument,
818 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
819 "communication, the target DistObject's Map must be the same "
820 "(in the sense of Tpetra::Map::isSameAs) as the input "
821 "Export/Import object's source Map.");
822 }
823 else if (restrictedMode && revOp == DoForward) {
824 const bool myMapLocallyFittedTransferTgtMap =
825 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
826 TEUCHOS_TEST_FOR_EXCEPTION
827 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
828 "Tpetra::DistObject::" << modeString << ": For forward-mode "
829 "communication using restricted mode, Export/Import object's "
830 "target Map must be locally fitted (in the sense of "
831 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
832 }
833 else { // if (restrictedMode && revOp == DoReverse)
834 const bool myMapLocallyFittedTransferSrcMap =
835 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
836 TEUCHOS_TEST_FOR_EXCEPTION
837 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
838 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
839 "communication using restricted mode, Export/Import object's "
840 "source Map must be locally fitted (in the sense of "
841 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
842 }
843
844 // SrcDistObject need not even _have_ Maps. However, if the
845 // source object is a DistObject, it has a Map, and we may
846 // compare that Map with the Transfer's Maps.
847 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
848 if (srcDistObj != nullptr) {
849 if (revOp == DoForward) {
850 const bool srcMapSameAsImportSrcMap =
851 srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
852 TEUCHOS_TEST_FOR_EXCEPTION
853 (! srcMapSameAsImportSrcMap, std::invalid_argument,
854 "Tpetra::DistObject::" << modeString << ": For forward-mode "
855 "communication, the source DistObject's Map must be the same "
856 "as the input Export/Import object's source Map.");
857 }
858 else { // revOp == DoReverse
859 const bool srcMapSameAsImportTgtMap =
860 srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
861 TEUCHOS_TEST_FOR_EXCEPTION
862 (! srcMapSameAsImportTgtMap, std::invalid_argument,
863 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
864 "communication, the source DistObject's Map must be the same "
865 "as the input Export/Import object's target Map.");
866 }
867 }
868 }
869
870 const size_t numSameIDs = transfer.getNumSameIDs ();
871 Distributor& distor = transfer.getDistributor ();
872 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
873
874 TEUCHOS_TEST_FOR_EXCEPTION
875 (debug && restrictedMode &&
876 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
877 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
878 std::invalid_argument,
879 "Tpetra::DistObject::" << modeString << ": Transfer object "
880 "cannot have permutes in restricted mode.");
881
882 // Do we need all communication buffers to live on host?
883 if (verbose) {
884 std::ostringstream os;
885 os << *prefix << "doTransfer: Use new interface; "
886 "commOnHost=" << (commOnHost ? "true" : "false") << endl;
887 std::cerr << os.str ();
888 }
889
890 using const_lo_dv_type =
891 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
892 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
893 transfer.getPermuteToLIDs_dv () :
894 transfer.getPermuteFromLIDs_dv ();
895 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
896 transfer.getPermuteFromLIDs_dv () :
897 transfer.getPermuteToLIDs_dv ();
898 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
899 transfer.getRemoteLIDs_dv () :
900 transfer.getExportLIDs_dv ();
901 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
902 transfer.getExportLIDs_dv () :
903 transfer.getRemoteLIDs_dv ();
904 const bool canTryAliasing = (revOp == DoForward) ?
905 transfer.areRemoteLIDsContiguous() :
906 transfer.areExportLIDsContiguous();
907 // const bool canTryAliasing = false;
908
909 ProfilingRegion region_dTN(funcName);
910#ifdef HAVE_TPETRA_TRANSFER_TIMERS
911 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
912 // of Kokkos profiling.
913 Teuchos::TimeMonitor doXferMon (*doXferTimer_);
914#endif // HAVE_TPETRA_TRANSFER_TIMERS
915
916 if (verbose) {
917 std::ostringstream os;
918 os << *prefix << "Input arguments:" << endl
919 << *prefix << " combineMode: " << combineModeToString (CM) << endl
920 << *prefix << " numSameIDs: " << numSameIDs << endl
921 << *prefix << " "
922 << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
923 << *prefix << " "
924 << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
925 << *prefix << " "
926 << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
927 << *prefix << " "
928 << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
929 << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
930 << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
931 std::cerr << os.str ();
932 }
933
934 {
935 ProfilingRegion region_cs ("Tpetra::DistObject::doTransferNew::checkSizes");
936 if (verbose) {
937 std::ostringstream os;
938 os << *prefix << "1. checkSizes" << endl;
939 std::cerr << os.str ();
940 }
941 const bool checkSizesResult = this->checkSizes (src);
942 TEUCHOS_TEST_FOR_EXCEPTION
943 (! checkSizesResult, std::invalid_argument,
944 "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
945 "destination object is not a legal target for redistribution from the "
946 "source object. This probably means that they do not have the same "
947 "dimensions. For example, MultiVectors must have the same number of "
948 "rows and columns.");
949 }
950
951 // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
952 // that if CM == INSERT || CM == REPLACE, the target object could
953 // be write only. We don't optimize for that here.
954
955 if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
956 // There is at least one GID to copy or permute.
957 if (verbose) {
958 std::ostringstream os;
959 os << *prefix << "2. copyAndPermute" << endl;
960 std::cerr << os.str ();
961 }
962 ProfilingRegion region_cp
963 ("Tpetra::DistObject::doTransferNew::copyAndPermute");
964#ifdef HAVE_TPETRA_TRANSFER_TIMERS
965 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
966 // of Kokkos profiling.
967 Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
968#endif // HAVE_TPETRA_TRANSFER_TIMERS
969
970 if (numSameIDs + permuteToLIDs.extent (0) != 0) {
971 // There is at least one GID to copy or permute.
972 if (verbose) {
973 std::ostringstream os;
974 os << *prefix << "2. copyAndPermute" << endl;
975 std::cerr << os.str ();
976 }
977 this->copyAndPermute (src, numSameIDs, permuteToLIDs,
978 permuteFromLIDs, CM);
979 if (verbose) {
980 std::ostringstream os;
981 os << *prefix << "After copyAndPermute:" << endl
982 << *prefix << " "
983 << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
984 << endl
985 << *prefix << " "
986 << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
987 << endl;
988 std::cerr << os.str ();
989 }
990 }
991 }
992
993 // The method may return zero even if the implementation actually
994 // does have a constant number of packets per LID. However, if it
995 // returns nonzero, we may use this information to avoid
996 // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
997 // will set this to its final value.
998 //
999 // We only need this if CM != ZERO, but it has to be lifted out of
1000 // that scope because there are multiple tests for CM != ZERO.
1001 size_t constantNumPackets = this->constantNumberOfPackets ();
1002 if (verbose) {
1003 std::ostringstream os;
1004 os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
1005 std::cerr << os.str ();
1006 }
1007
1008 // We only need to pack communication buffers if the combine mode
1009 // is not ZERO. A "ZERO combine mode" means that the results are
1010 // the same as if we had received all zeros, and added them to the
1011 // existing values. That means we don't need to communicate.
1012 if (CM != ZERO) {
1013 if (constantNumPackets == 0) {
1014 if (verbose) {
1015 std::ostringstream os;
1016 os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1017 << endl;
1018 std::cerr << os.str ();
1019 }
1020 // This only reallocates if necessary, that is, if the sizes
1021 // don't match.
1022 this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1023 remoteLIDs.extent (0));
1024 }
1025
1026 if (verbose) {
1027 std::ostringstream os;
1028 os << *prefix << "4. packAndPrepare: before, "
1029 << dualViewStatusToString (this->exports_, "exports_")
1030 << endl;
1031 std::cerr << os.str ();
1032 }
1033
1034 doPackAndPrepare(src, exportLIDs, constantNumPackets, execution_space());
1035 if (commOnHost) {
1036 this->exports_.sync_host();
1037 }
1038 else {
1039 this->exports_.sync_device();
1040 }
1041
1042 if (verbose) {
1043 std::ostringstream os;
1044 os << *prefix << "5.1. After packAndPrepare, "
1045 << dualViewStatusToString (this->exports_, "exports_")
1046 << endl;
1047 std::cerr << os.str ();
1048 }
1049 } // if (CM != ZERO)
1050
1051 // We only need to send data if the combine mode is not ZERO.
1052 if (CM != ZERO) {
1053 if (constantNumPackets != 0) {
1054 // There are a constant number of packets per element. We
1055 // already know (from the number of "remote" (incoming)
1056 // elements) how many incoming elements we expect, so we can
1057 // resize the buffer accordingly.
1058 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1059 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1060 }
1061
1062 // Do we need to do communication (via doPostsAndWaits)?
1063 bool needCommunication = true;
1064
1065 // This may be NULL. It will be used below.
1066 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1067
1068 if (revOp == DoReverse && ! this->isDistributed ()) {
1069 needCommunication = false;
1070 }
1071 // FIXME (mfh 30 Jun 2013): Checking whether the source object
1072 // is distributed requires a cast to DistObject. If it's not a
1073 // DistObject, then I'm not quite sure what to do. Perhaps it
1074 // would be more appropriate for SrcDistObject to have an
1075 // isDistributed() method. For now, I'll just assume that we
1076 // need to do communication unless the cast succeeds and the
1077 // source is not distributed.
1078 else if (revOp == DoForward && srcDistObj != NULL &&
1079 ! srcDistObj->isDistributed ()) {
1080 needCommunication = false;
1081 }
1082
1083 if (! needCommunication) {
1084 if (verbose) {
1085 std::ostringstream os;
1086 os << *prefix << "Comm not needed; skipping" << endl;
1087 std::cerr << os.str ();
1088 }
1089 }
1090 else {
1091 ProfilingRegion region_dpw
1092 ("Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1093#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1094 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1095 // favor of Kokkos profiling.
1096 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1097#endif // HAVE_TPETRA_TRANSFER_TIMERS
1098
1099 if (verbose) {
1100 std::ostringstream os;
1101 os << *prefix << "7.0. "
1102 << (revOp == DoReverse ? "Reverse" : "Forward")
1103 << " mode" << endl;
1104 std::cerr << os.str ();
1105 }
1106
1107 doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1108 } // if (needCommunication)
1109 } // if (CM != ZERO)
1110 }
1111
1112 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1113 void
1115 endTransfer(const SrcDistObject& src,
1116 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1117 const char modeString[],
1118 const ReverseOption revOp,
1119 const CombineMode CM,
1120 bool restrictedMode)
1121 {
1122 using Details::Behavior;
1126 using Kokkos::Compat::getArrayView;
1127 using Kokkos::Compat::getConstArrayView;
1128 using Kokkos::Compat::getKokkosViewDeepCopy;
1129 using Kokkos::Compat::create_const_view;
1130 using std::endl;
1133
1134 const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
1135 const char funcNameHost[] = "Tpetra::DistObject::endTransfer[Host]";
1136 const char funcNameDevice[] = "Tpetra::DistObject::endTransfer[Device]";
1137 const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
1138 ProfilingRegion region_doTransfer(funcName);
1139 const bool verbose = Behavior::verbose("DistObject");
1140 std::shared_ptr<std::string> prefix;
1141 if (verbose) {
1142 std::ostringstream os;
1143 prefix = this->createPrefix("DistObject", "doTransfer");
1144 os << *prefix << "Source type: " << Teuchos::typeName(src)
1145 << ", Target type: " << Teuchos::typeName(*this) << endl;
1146 std::cerr << os.str();
1147 }
1148
1149 // "Restricted Mode" does two things:
1150 // 1) Skips copyAndPermute
1151 // 2) Allows the "target" Map of the transfer to be a subset of
1152 // the Map of *this, in a "locallyFitted" sense.
1153 //
1154 // This cannot be used if #2 is not true, OR there are permutes.
1155 // Source Maps still need to match
1156
1157 // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1158 // checks. These may communicate more.
1159 const bool debug = Behavior::debug("DistObject");
1160 if (debug) {
1161 if (! restrictedMode && revOp == DoForward) {
1162 const bool myMapSameAsTransferTgtMap =
1163 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1164 TEUCHOS_TEST_FOR_EXCEPTION
1165 (! myMapSameAsTransferTgtMap, std::invalid_argument,
1166 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1167 "communication, the target DistObject's Map must be the same "
1168 "(in the sense of Tpetra::Map::isSameAs) as the input "
1169 "Export/Import object's target Map.");
1170 }
1171 else if (! restrictedMode && revOp == DoReverse) {
1172 const bool myMapSameAsTransferSrcMap =
1173 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1174 TEUCHOS_TEST_FOR_EXCEPTION
1175 (! myMapSameAsTransferSrcMap, std::invalid_argument,
1176 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1177 "communication, the target DistObject's Map must be the same "
1178 "(in the sense of Tpetra::Map::isSameAs) as the input "
1179 "Export/Import object's source Map.");
1180 }
1181 else if (restrictedMode && revOp == DoForward) {
1182 const bool myMapLocallyFittedTransferTgtMap =
1183 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1184 TEUCHOS_TEST_FOR_EXCEPTION
1185 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1186 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1187 "communication using restricted mode, Export/Import object's "
1188 "target Map must be locally fitted (in the sense of "
1189 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1190 }
1191 else { // if (restrictedMode && revOp == DoReverse)
1192 const bool myMapLocallyFittedTransferSrcMap =
1193 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1194 TEUCHOS_TEST_FOR_EXCEPTION
1195 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1196 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1197 "communication using restricted mode, Export/Import object's "
1198 "source Map must be locally fitted (in the sense of "
1199 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1200 }
1201
1202 // SrcDistObject need not even _have_ Maps. However, if the
1203 // source object is a DistObject, it has a Map, and we may
1204 // compare that Map with the Transfer's Maps.
1205 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1206 if (srcDistObj != nullptr) {
1207 if (revOp == DoForward) {
1208 const bool srcMapSameAsImportSrcMap =
1209 srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1210 TEUCHOS_TEST_FOR_EXCEPTION
1211 (! srcMapSameAsImportSrcMap, std::invalid_argument,
1212 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1213 "communication, the source DistObject's Map must be the same "
1214 "as the input Export/Import object's source Map.");
1215 }
1216 else { // revOp == DoReverse
1217 const bool srcMapSameAsImportTgtMap =
1218 srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1219 TEUCHOS_TEST_FOR_EXCEPTION
1220 (! srcMapSameAsImportTgtMap, std::invalid_argument,
1221 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1222 "communication, the source DistObject's Map must be the same "
1223 "as the input Export/Import object's target Map.");
1224 }
1225 }
1226 }
1227
1228 Distributor& distor = transfer.getDistributor ();
1229 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1230
1231 TEUCHOS_TEST_FOR_EXCEPTION
1232 (debug && restrictedMode &&
1233 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1234 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1235 std::invalid_argument,
1236 "Tpetra::DistObject::" << modeString << ": Transfer object "
1237 "cannot have permutes in restricted mode.");
1238
1239 // Do we need all communication buffers to live on host?
1240 if (verbose) {
1241 std::ostringstream os;
1242 os << *prefix << "doTransfer: Use new interface; "
1243 "commOnHost=" << (commOnHost ? "true" : "false") << endl;
1244 std::cerr << os.str ();
1245 }
1246
1247 using const_lo_dv_type =
1248 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1249 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1250 transfer.getPermuteToLIDs_dv () :
1251 transfer.getPermuteFromLIDs_dv ();
1252 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1253 transfer.getPermuteFromLIDs_dv () :
1254 transfer.getPermuteToLIDs_dv ();
1255 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1256 transfer.getRemoteLIDs_dv () :
1257 transfer.getExportLIDs_dv ();
1258 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1259 transfer.getExportLIDs_dv () :
1260 transfer.getRemoteLIDs_dv ();
1261 const bool canTryAliasing = (revOp == DoForward) ?
1262 transfer.areRemoteLIDsContiguous() :
1263 transfer.areExportLIDsContiguous();
1264
1265 size_t constantNumPackets = this->constantNumberOfPackets ();
1266
1267 // We only need to send data if the combine mode is not ZERO.
1268 if (CM != ZERO) {
1269 if (constantNumPackets != 0) {
1270 // There are a constant number of packets per element. We
1271 // already know (from the number of "remote" (incoming)
1272 // elements) how many incoming elements we expect, so we can
1273 // resize the buffer accordingly.
1274 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1275 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1276 }
1277
1278 // Do we need to do communication (via doPostsAndWaits)?
1279 bool needCommunication = true;
1280
1281 // This may be NULL. It will be used below.
1282 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1283
1284 if (revOp == DoReverse && ! this->isDistributed ()) {
1285 needCommunication = false;
1286 }
1287 // FIXME (mfh 30 Jun 2013): Checking whether the source object
1288 // is distributed requires a cast to DistObject. If it's not a
1289 // DistObject, then I'm not quite sure what to do. Perhaps it
1290 // would be more appropriate for SrcDistObject to have an
1291 // isDistributed() method. For now, I'll just assume that we
1292 // need to do communication unless the cast succeeds and the
1293 // source is not distributed.
1294 else if (revOp == DoForward && srcDistObj != NULL &&
1295 ! srcDistObj->isDistributed ()) {
1296 needCommunication = false;
1297 }
1298
1299 if (! needCommunication) {
1300 if (verbose) {
1301 std::ostringstream os;
1302 os << *prefix << "Comm not needed; skipping" << endl;
1303 std::cerr << os.str ();
1304 }
1305 }
1306 else {
1307 distributorActor_.doWaits(distributorPlan);
1308
1309 if (verbose) {
1310 std::ostringstream os;
1311 os << *prefix << "8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) << ", constantNumPackets " << constantNumPackets << endl;
1312 std::cerr << os.str ();
1313 }
1314 doUnpackAndCombine(remoteLIDs, constantNumPackets, CM, execution_space());
1315 } // if (needCommunication)
1316 } // if (CM != ZERO)
1317
1318 if (verbose) {
1319 std::ostringstream os;
1320 os << *prefix << "9. Done!" << endl;
1321 std::cerr << os.str ();
1322 }
1323
1324 if (verbose) {
1325 std::ostringstream os;
1326 os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1327 std::cerr << os.str ();
1328 }
1329 }
1330
1331 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1332 void
1334 doPosts(const Details::DistributorPlan& distributorPlan,
1335 size_t constantNumPackets,
1336 bool commOnHost,
1337 std::shared_ptr<std::string> prefix,
1338 const bool canTryAliasing,
1339 const CombineMode CM)
1340 {
1343 using Kokkos::Compat::create_const_view;
1344 using std::endl;
1345
1346 const bool verbose = Details::Behavior::verbose("DistObject");
1347
1348 if (constantNumPackets == 0) { // variable num packets per LID
1349 if (verbose) {
1350 std::ostringstream os;
1351 os << *prefix << "7.1. Variable # packets / LID: first comm "
1352 << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1353 << endl;
1354 std::cerr << os.str ();
1355 }
1356 size_t totalImportPackets = 0;
1357 if (commOnHost) {
1358 if (this->numExportPacketsPerLID_.need_sync_host ()) {
1359 this->numExportPacketsPerLID_.sync_host ();
1360 }
1361 if (this->numImportPacketsPerLID_.need_sync_host ()) {
1362 this->numImportPacketsPerLID_.sync_host ();
1363 }
1364 this->numImportPacketsPerLID_.modify_host (); // out arg
1365 auto numExp_h =
1366 create_const_view (this->numExportPacketsPerLID_.view_host ());
1367 auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1368
1369 // MPI communication happens here.
1370 if (verbose) {
1371 std::ostringstream os;
1372 os << *prefix << "Call doPostsAndWaits"
1373 << endl;
1374 std::cerr << os.str ();
1375 }
1376 distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1377
1378 if (verbose) {
1379 std::ostringstream os;
1380 os << *prefix << "Count totalImportPackets" << std::endl;
1381 std::cerr << os.str ();
1382 }
1383 using the_dev_type = typename decltype (numImp_h)::device_type;
1384 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1385 }
1386 else { // ! commOnHost
1387 this->numExportPacketsPerLID_.sync_device ();
1388 this->numImportPacketsPerLID_.sync_device ();
1389 this->numImportPacketsPerLID_.modify_device (); // out arg
1390 auto numExp_d = create_const_view
1391 (this->numExportPacketsPerLID_.view_device ());
1392 auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1393
1394 // MPI communication happens here.
1395 if (verbose) {
1396 std::ostringstream os;
1397 os << *prefix << "Call doPostsAndWaits"
1398 << endl;
1399 std::cerr << os.str ();
1400 }
1401
1402 distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1403
1404 if (verbose) {
1405 std::ostringstream os;
1406 os << *prefix << "Count totalImportPackets" << std::endl;
1407 std::cerr << os.str ();
1408 }
1409 using the_dev_type = typename decltype (numImp_d)::device_type;
1410 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1411 }
1412
1413 if (verbose) {
1414 std::ostringstream os;
1415 os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1416 std::cerr << os.str ();
1417 }
1418 this->reallocImportsIfNeeded (totalImportPackets, verbose,
1419 prefix.get (), canTryAliasing, CM);
1420 if (verbose) {
1421 std::ostringstream os;
1422 os << *prefix << "7.3. Second comm" << std::endl;
1423 std::cerr << os.str ();
1424 }
1425
1426 // mfh 04 Feb 2019: Distributor expects the "num packets per
1427 // LID" arrays on host, so that it can issue MPI sends and
1428 // receives correctly.
1429 this->numExportPacketsPerLID_.sync_host ();
1430 this->numImportPacketsPerLID_.sync_host ();
1431
1432 // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1433 // doReversePostsAndWaits currently want
1434 // numExportPacketsPerLID and numImportPacketsPerLID as
1435 // Teuchos::ArrayView, rather than as Kokkos::View.
1436 //
1437 // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1438 // device. The above syncs might.
1439 auto numExportPacketsPerLID_av =
1440 getArrayViewFromDualView (this->numExportPacketsPerLID_);
1441 auto numImportPacketsPerLID_av =
1442 getArrayViewFromDualView (this->numImportPacketsPerLID_);
1443
1444 // imports_ is for output only, so we don't need to sync it
1445 // before marking it as modified. However, in order to
1446 // prevent spurious debug-mode errors (e.g., "modified on
1447 // both device and host"), we first need to clear its
1448 // "modified" flags.
1449 this->imports_.clear_sync_state ();
1450
1451 if (verbose) {
1452 std::ostringstream os;
1453 os << *prefix << "Comm on "
1454 << (commOnHost ? "host" : "device")
1455 << "; call doPosts" << endl;
1456 std::cerr << os.str ();
1457 }
1458
1459 if (commOnHost) {
1460 this->imports_.modify_host ();
1461 distributorActor_.doPosts
1462 (distributorPlan,
1463 create_const_view (this->exports_.view_host ()),
1464 numExportPacketsPerLID_av,
1465 this->imports_.view_host (),
1466 numImportPacketsPerLID_av);
1467 }
1468 else { // pack on device
1469 Kokkos::fence("DistObject::doPosts-1"); // for UVM
1470 this->imports_.modify_device ();
1471 distributorActor_.doPosts
1472 (distributorPlan,
1473 create_const_view (this->exports_.view_device ()),
1474 numExportPacketsPerLID_av,
1475 this->imports_.view_device (),
1476 numImportPacketsPerLID_av);
1477 }
1478 }
1479 else { // constant number of packets per LID
1480 if (verbose) {
1481 std::ostringstream os;
1482 os << *prefix << "7.1. Const # packets per LID: " << endl
1483 << *prefix << " "
1484 << dualViewStatusToString (this->exports_, "exports_")
1485 << endl
1486 << *prefix << " "
1487 << dualViewStatusToString (this->exports_, "imports_")
1488 << endl;
1489 std::cerr << os.str ();
1490 }
1491 // imports_ is for output only, so we don't need to sync it
1492 // before marking it as modified. However, in order to
1493 // prevent spurious debug-mode errors (e.g., "modified on
1494 // both device and host"), we first need to clear its
1495 // "modified" flags.
1496 this->imports_.clear_sync_state ();
1497
1498 if (verbose) {
1499 std::ostringstream os;
1500 os << *prefix << "7.2. Comm on "
1501 << (commOnHost ? "host" : "device")
1502 << "; call doPosts" << endl;
1503 std::cerr << os.str ();
1504 }
1505 if (commOnHost) {
1506 this->imports_.modify_host ();
1507 distributorActor_.doPosts
1508 (distributorPlan,
1509 create_const_view (this->exports_.view_host ()),
1510 constantNumPackets,
1511 this->imports_.view_host ());
1512 }
1513 else { // pack on device
1514 Kokkos::fence("DistObject::doPosts-2"); // for UVM
1515 this->imports_.modify_device ();
1516 distributorActor_.doPosts
1517 (distributorPlan,
1518 create_const_view (this->exports_.view_device ()),
1519 constantNumPackets,
1520 this->imports_.view_device ());
1521 } // commOnHost
1522 } // constant or variable num packets per LID
1523 }
1524
1525 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1526 void
1529 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1530 size_t& constantNumPackets,
1531 const execution_space &space)
1532 {
1534 using std::endl;
1535 const bool debug = Details::Behavior::debug("DistObject");
1536
1537 ProfilingRegion region_pp
1538 ("Tpetra::DistObject::doPackAndPrepare");
1539#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1540 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1541 // favor of Kokkos profiling.
1542 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1543#endif // HAVE_TPETRA_TRANSFER_TIMERS
1544
1545 // Ask the source to pack data. Also ask it whether there are
1546 // a constant number of packets per element
1547 // (constantNumPackets is an output argument). If there are,
1548 // constantNumPackets will come back nonzero. Otherwise, the
1549 // source will fill the numExportPacketsPerLID_ array.
1550
1551 // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1552 // Alternately, make packAndPrepare take a "commOnHost"
1553 // argument to tell it where to leave the data?
1554 //
1555 // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1556 // the freedom to pack and unpack either on host or device.
1557 // We should prefer sync'ing only on demand. Thus, we can
1558 // answer the above question: packAndPrepare should not
1559 // take a commOnHost argument, and doTransferNew should sync
1560 // where needed, if needed.
1561 if (debug) {
1562 std::ostringstream lclErrStrm;
1563 bool lclSuccess = false;
1564 try {
1565 this->packAndPrepare (src, exportLIDs, this->exports_,
1566 this->numExportPacketsPerLID_,
1567 constantNumPackets, space);
1568 lclSuccess = true;
1569 }
1570 catch (std::exception& e) {
1571 lclErrStrm << "packAndPrepare threw an exception: "
1572 << endl << e.what();
1573 }
1574 catch (...) {
1575 lclErrStrm << "packAndPrepare threw an exception "
1576 "not a subclass of std::exception.";
1577 }
1578 const char gblErrMsgHeader[] = "Tpetra::DistObject "
1579 "threw an exception in packAndPrepare on "
1580 "one or more processes in the DistObject's communicator.";
1581 auto comm = getMap()->getComm();
1582 Details::checkGlobalError(std::cerr, lclSuccess,
1583 lclErrStrm.str().c_str(),
1584 gblErrMsgHeader, *comm);
1585 }
1586 else {
1587 this->packAndPrepare (src, exportLIDs, this->exports_,
1588 this->numExportPacketsPerLID_,
1589 constantNumPackets, space);
1590 }
1591 }
1592
1593 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1594 void
1596 doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1597 size_t constantNumPackets,
1598 CombineMode CM,
1599 const execution_space &space)
1600 {
1602 using std::endl;
1603 const bool debug = Details::Behavior::debug("DistObject");
1604
1605 ProfilingRegion region_uc
1606 ("Tpetra::DistObject::doUnpackAndCombine");
1607#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1608 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1609 // favor of Kokkos profiling.
1610 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1611#endif // HAVE_TPETRA_TRANSFER_TIMERS
1612
1613 if (debug) {
1614 std::ostringstream lclErrStrm;
1615 bool lclSuccess = false;
1616 try {
1617 this->unpackAndCombine (remoteLIDs, this->imports_,
1618 this->numImportPacketsPerLID_,
1619 constantNumPackets, CM, space);
1620 lclSuccess = true;
1621 }
1622 catch (std::exception& e) {
1623 lclErrStrm << "doUnpackAndCombine threw an exception: "
1624 << endl << e.what();
1625 }
1626 catch (...) {
1627 lclErrStrm << "doUnpackAndCombine threw an exception "
1628 "not a subclass of std::exception.";
1629 }
1630 const char gblErrMsgHeader[] = "Tpetra::DistObject "
1631 "threw an exception in unpackAndCombine on "
1632 "one or more processes in the DistObject's communicator.";
1633 auto comm = getMap()->getComm();
1634 Details::checkGlobalError(std::cerr, lclSuccess,
1635 lclErrStrm.str().c_str(),
1636 gblErrMsgHeader, *comm);
1637 }
1638 else {
1639 this->unpackAndCombine (remoteLIDs, this->imports_,
1640 this->numImportPacketsPerLID_,
1641 constantNumPackets, CM, space);
1642 }
1643 }
1644
1645 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1646 void
1649 (const SrcDistObject&,
1650 const size_t,
1651 const Kokkos::DualView<
1652 const local_ordinal_type*,
1654 const Kokkos::DualView<
1655 const local_ordinal_type*,
1657 const CombineMode CM)
1658 {}
1659
1660// clang-format on
1661template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1663 const SrcDistObject &source, const size_t numSameIDs,
1664 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1665 &permuteToLIDs,
1666 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1667 &permuteFromLIDs,
1668 const CombineMode CM, const execution_space &space) {
1669 /*
1670 This is called if the derived class doesn't know how to pack and prepare in
1671 an arbitrary execution space instance, but it was asked to anyway.
1672 Provide a safe illusion by actually doing the work in the default instance,
1673 and syncing the default instance with the provided instance.
1674 The caller expects
1675 1. any work in the provided instance to complete before this.
1676 2. This to complete before any following work in the provided instance.
1677 */
1678
1679 space.fence(); // // TODO: Tpetra::Details::Spaces::exec_space_wait
1680 copyAndPermute(source, numSameIDs, permuteToLIDs, permuteFromLIDs,
1681 CM); // default instance
1682 execution_space().fence(); // TODO:
1683 // Tpetra::Details::Spaces::exec_space_wait
1684}
1685// clang-format off
1686
1687
1688 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1689 void
1692 (const SrcDistObject&,
1693 const Kokkos::DualView<
1694 const local_ordinal_type*,
1696 Kokkos::DualView<
1697 packet_type*,
1699 Kokkos::DualView<
1700 size_t*,
1702 size_t&)
1703 {}
1704
1705// clang-format on
1706template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1708 const SrcDistObject &source,
1709 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1710 &exportLIDs,
1711 Kokkos::DualView<packet_type *, buffer_device_type> &exports,
1712 Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1713 size_t &constantNumPackets, const execution_space &space) {
1714 /*
1715 This is called if the derived class doesn't know how to pack and prepare in
1716 an arbitrary execution space instance, but it was asked to anyway.
1717 Provide a safe illusion by actually doing the work in the default instance,
1718 and syncing the default instance with the provided instance.
1719
1720 The caller expects
1721 1. any work in the provided instance to complete before this.
1722 2. This to complete before any following work in the provided instance.
1723 */
1724
1725 // wait for any work from prior operations in the provided instance to
1726 // complete
1727 space.fence(); // TODO: Details::Spaces::exec_space_wait
1728
1729 // pack and prepare in the default instance.
1730 packAndPrepare(source, exportLIDs, exports, numPacketsPerLID,
1731 constantNumPackets); // default instance
1732
1733 // wait for the default instance to complete before returning, so any
1734 // following work inserted into the provided instance will be done after this
1735 execution_space().fence(); // TODO: Details::Spaces::exec_space_wait
1736}
1737// clang-format off
1738
1739 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1740 void
1743 (const Kokkos::DualView<
1744 const local_ordinal_type*,
1745 buffer_device_type>& /* importLIDs */,
1746 Kokkos::DualView<
1747 packet_type*,
1748 buffer_device_type> /* imports */,
1749 Kokkos::DualView<
1750 size_t*,
1751 buffer_device_type> /* numPacketsPerLID */,
1752 const size_t /* constantNumPackets */,
1753 const CombineMode /* combineMode */)
1754 {}
1755
1756// clang-format on
1757template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1759 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1760 &importLIDs,
1761 Kokkos::DualView<packet_type *, buffer_device_type> imports,
1762 Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1763 const size_t constantNumPackets, const CombineMode combineMode,
1764 const execution_space &space) {
1765 // Wait for any work in the provided space to complete
1766 space.fence(); // TODO: Details::Spaces::exec_space_wait(execution_space(),
1767 // space);
1768 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets,
1769 combineMode); // default instance
1770 // wait for unpack to finish in the default instance, since the caller
1771 // may be expecting sequential semantics in the `space` instance
1772 execution_space().fence(); // TODO: Details::Spaces::exec_space_wait(space,
1773 // execution_space());
1774}
1775// clang-format off
1776
1777template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1779 std::ostream &os) const {
1780 using std::endl;
1781 using Teuchos::FancyOStream;
1782 using Teuchos::getFancyOStream;
1783 using Teuchos::RCP;
1784 using Teuchos::rcpFromRef;
1785
1786 RCP<FancyOStream> out = getFancyOStream(rcpFromRef(os));
1787 this->describe(*out, Teuchos::VERB_DEFAULT);
1788}
1789
1790template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1791std::unique_ptr<std::string>
1792DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::createPrefix(
1793 const char className[], const char methodName[]) const {
1794 auto map = this->getMap();
1795 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1796 return Details::createPrefix(comm.getRawPtr(), className, methodName);
1797}
1798
1799template <class DistObjectType>
1801 Teuchos::RCP<DistObjectType> &input,
1802 const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
1803 typename DistObjectType::global_ordinal_type,
1804 typename DistObjectType::node_type>> &newMap) {
1805 input->removeEmptyProcessesInPlace(newMap);
1806 if (newMap.is_null()) { // my process is excluded
1807 input = Teuchos::null;
1808 }
1809}
1810
1811template <class DistObjectType>
1812void removeEmptyProcessesInPlace(Teuchos::RCP<DistObjectType> &input) {
1813 auto newMap = input->getMap()->removeEmptyProcesses();
1815}
1816
1817// Explicit instantiation macro for general DistObject.
1818#define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1819 template class DistObject<SCALAR, LO, GO, NODE>;
1820
1821// Explicit instantiation macro for DistObject<char, ...>.
1822// The "SLGN" stuff above doesn't work for Packet=char.
1823#define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1824 template class DistObject<char, LO, GO, NODE>;
1825
1826} // namespace Tpetra
1827
1828#endif // TPETRA_DISTOBJECT_DEF_HPP
1829// clang-format on
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
Base class for distributed Tpetra objects that support data redistribution.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
Kokkos::DualView< packet_type *, buffer_device_type > exports_
Buffer from which packed data are exported (sent to other processes).
Kokkos::DualView< packet_type *, buffer_device_type > imports_
Buffer into which packed data are imported (received from other processes).
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object ("forward mode").
void beginTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Implementation detail of doTransfer.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
bool transferArrived() const
Whether the data from an import/export operation has arrived, and is ready for the unpack and combine...
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets)
Pack data and metadata for communication (sends).
LocalOrdinal local_ordinal_type
The type of local indices.
typename ::Kokkos::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
typename device_type::execution_space execution_space
The Kokkos execution space.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode)
Perform any unpacking and combining after communication.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
virtual size_t constantNumberOfPackets() const
Whether the implementation's instance promises always to have a constant number of packets per LID (l...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
virtual std::string description() const
One-line descriptiion of this object.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object's Map.
bool isDistributed() const
Whether this is a globally distributed object.
Sets up and executes a communication plan for a Tpetra DistObject.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
Abstract base class for objects that can be the source of an Import or Export operation.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
CombineMode
Rule for combining data in an Import or Export.
@ ZERO
Replace old values with zero.