Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_CRSGRAPH_DEF_HPP
11#define TPETRA_CRSGRAPH_DEF_HPP
12
15
20#include "Tpetra_Details_getGraphDiagOffsets.hpp"
21#include "Tpetra_Details_getGraphOffRankOffsets.hpp"
22#include "Tpetra_Details_makeColMap.hpp"
26#include "Tpetra_Distributor.hpp"
27#include "Teuchos_SerialDenseMatrix.hpp"
28#include "Tpetra_Vector.hpp"
31#include "Tpetra_Details_packCrsGraph.hpp"
32#include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
33#include "Tpetra_Details_CrsPadding.hpp"
34#include "Tpetra_Util.hpp"
35#include <algorithm>
36#include <limits>
37#include <map>
38#include <sstream>
39#include <string>
40#include <type_traits>
41#include <utility>
42#include <vector>
43
44namespace Tpetra {
45 namespace Details {
46 namespace Impl {
47
48 template<class MapIter>
49 void
50 verbosePrintMap(std::ostream& out,
51 MapIter beg,
52 MapIter end,
53 const size_t numEnt,
54 const char mapName[])
55 {
56 using ::Tpetra::Details::Behavior;
58
59 out << mapName << ": {";
60 const size_t maxNumToPrint =
62 if (maxNumToPrint == 0) {
63 if (numEnt != 0) {
64 out << "...";
65 }
66 }
67 else {
68 const size_t numToPrint = numEnt > maxNumToPrint ?
69 maxNumToPrint : numEnt;
70 size_t count = 0;
71 for (MapIter it = beg; it != end; ++it) {
72 out << "(" << (*it).first << ", ";
73 verbosePrintArray(out, (*it).second, "gblColInds",
74 maxNumToPrint);
75 out << ")";
76 if (count + size_t(1) < numToPrint) {
77 out << ", ";
78 }
79 ++count;
80 }
81 if (count < numEnt) {
82 out << ", ...";
83 }
84 }
85 out << "}";
86 }
87
88 template<class LO, class GO, class Node>
89 Teuchos::ArrayView<GO>
90 getRowGraphGlobalRow(
91 std::vector<GO>& gblColIndsStorage,
92 const RowGraph<LO, GO, Node>& graph,
93 const GO gblRowInd)
94 {
95 size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
96 if (gblColIndsStorage.size() < origNumEnt) {
97 gblColIndsStorage.resize(origNumEnt);
98 }
99 typename CrsGraph<LO,GO,Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
100 origNumEnt);
101 graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
102 Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(),origNumEnt);
103 return retval;
104 }
105
106 template<class LO, class GO, class DT, class OffsetType, class NumEntType>
107 class ConvertColumnIndicesFromGlobalToLocal {
108 public:
109 ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
110 const ::Kokkos::View<const GO*, DT>& gblColInds,
111 const ::Kokkos::View<const OffsetType*, DT>& ptr,
112 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
113 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
114 lclColInds_ (lclColInds),
115 gblColInds_ (gblColInds),
116 ptr_ (ptr),
117 lclColMap_ (lclColMap),
118 numRowEnt_ (numRowEnt)
119 {}
120
121 KOKKOS_FUNCTION void
122 operator () (const LO& lclRow, OffsetType& curNumBad) const
123 {
124 const OffsetType offset = ptr_(lclRow);
125 // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
126 // of entries in a row to LO, as long as the row doesn't have
127 // too many duplicate entries.
128 const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
129 for (LO j = 0; j < numEnt; ++j) {
130 const GO gid = gblColInds_(offset + j);
131 const LO lid = lclColMap_.getLocalElement (gid);
132 lclColInds_(offset + j) = lid;
133 if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
134 ++curNumBad;
135 }
136 }
137 }
138
139 static OffsetType
140 run (const ::Kokkos::View<LO*, DT>& lclColInds,
141 const ::Kokkos::View<const GO*, DT>& gblColInds,
142 const ::Kokkos::View<const OffsetType*, DT>& ptr,
143 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
144 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
145 {
146 typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
147 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
148
149 const LO lclNumRows = ptr.extent (0) == 0 ?
150 static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
151 OffsetType numBad = 0;
152 // Count of "bad" column indices is a reduction over rows.
153 ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
154 functor_type (lclColInds, gblColInds, ptr,
155 lclColMap, numRowEnt),
156 numBad);
157 return numBad;
158 }
159
160 private:
161 ::Kokkos::View<LO*, DT> lclColInds_;
162 ::Kokkos::View<const GO*, DT> gblColInds_;
163 ::Kokkos::View<const OffsetType*, DT> ptr_;
164 ::Tpetra::Details::LocalMap<LO, GO, DT> lclColMap_;
165 ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
166 };
167
168 } // namespace Impl
169
184 template<class LO, class GO, class DT, class OffsetType, class NumEntType>
185 OffsetType
186 convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
187 const Kokkos::View<const GO*, DT>& gblColInds,
188 const Kokkos::View<const OffsetType*, DT>& ptr,
189 const LocalMap<LO, GO, DT>& lclColMap,
190 const Kokkos::View<const NumEntType*, DT>& numRowEnt)
191 {
192 using Impl::ConvertColumnIndicesFromGlobalToLocal;
193 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
194 return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
195 }
196
197 template<class ViewType, class LO>
198 class MaxDifference {
199 public:
200 MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
201
202 KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
203 dst = 0;
204 }
205
206 KOKKOS_INLINE_FUNCTION void
207 join (LO& dst, const LO& src) const
208 {
209 dst = (src > dst) ? src : dst;
210 }
211
212 KOKKOS_INLINE_FUNCTION void
213 operator () (const LO lclRow, LO& maxNumEnt) const
214 {
215 const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
216 maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
217 }
218 private:
219 typename ViewType::const_type ptr_;
220 };
221
222 template<class ViewType, class LO>
223 typename ViewType::non_const_value_type
224 maxDifference (const char kernelLabel[],
225 const ViewType& ptr,
226 const LO lclNumRows)
227 {
228 if (lclNumRows == 0) {
229 // mfh 07 May 2018: Weirdly, I need this special case,
230 // otherwise I get the wrong answer.
231 return static_cast<LO> (0);
232 }
233 else {
234 using execution_space = typename ViewType::execution_space;
235 using range_type = Kokkos::RangePolicy<execution_space, LO>;
236 LO theMaxNumEnt {0};
237 Kokkos::parallel_reduce (kernelLabel,
238 range_type (0, lclNumRows),
239 MaxDifference<ViewType, LO> (ptr),
240 theMaxNumEnt);
241 return theMaxNumEnt;
242 }
243 }
244
245 } // namespace Details
246
247 template <class LocalOrdinal, class GlobalOrdinal, class Node>
248 bool
250 getDebug() {
251 return Details::Behavior::debug("CrsGraph");
252 }
253
254 template <class LocalOrdinal, class GlobalOrdinal, class Node>
255 bool
257 getVerbose() {
258 return Details::Behavior::verbose("CrsGraph");
259 }
260
261 template <class LocalOrdinal, class GlobalOrdinal, class Node>
262 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
263 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
264 const size_t maxNumEntriesPerRow,
265 const Teuchos::RCP<Teuchos::ParameterList>& params) :
266 dist_object_type (rowMap)
267 , rowMap_ (rowMap)
268 , numAllocForAllRows_ (maxNumEntriesPerRow)
269 {
270 const char tfecfFuncName[] =
271 "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
272 staticAssertions ();
273 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
274 (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
275 std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
276 "a valid size_t value, which in this case means it must not be "
277 "Teuchos::OrdinalTraits<size_t>::invalid().");
278 resumeFill (params);
280 }
281
282 template <class LocalOrdinal, class GlobalOrdinal, class Node>
283 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
284 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
285 const Teuchos::RCP<const map_type>& colMap,
286 const size_t maxNumEntriesPerRow,
287 const Teuchos::RCP<Teuchos::ParameterList>& params) :
288 dist_object_type (rowMap)
289 , rowMap_ (rowMap)
290 , colMap_ (colMap)
291 , numAllocForAllRows_ (maxNumEntriesPerRow)
292 {
293 const char tfecfFuncName[] =
294 "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
295 staticAssertions ();
296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
297 maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
298 std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
299 "a valid size_t value, which in this case means it must not be "
300 "Teuchos::OrdinalTraits<size_t>::invalid().");
301 resumeFill (params);
303 }
304
305
306 template <class LocalOrdinal, class GlobalOrdinal, class Node>
307 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
308 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
309 const Teuchos::ArrayView<const size_t>& numEntPerRow,
310 const Teuchos::RCP<Teuchos::ParameterList>& params) :
311 dist_object_type (rowMap)
312 , rowMap_ (rowMap)
314 {
315 const char tfecfFuncName[] =
316 "CrsGraph(rowMap,numEntPerRow,params): ";
317 staticAssertions ();
318
319 const size_t lclNumRows = rowMap.is_null () ?
320 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
321 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
322 static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
323 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
324 << " != the local number of rows " << lclNumRows << " as specified by "
325 "the input row Map.");
326
327 if (debug_) {
328 for (size_t r = 0; r < lclNumRows; ++r) {
329 const size_t curRowCount = numEntPerRow[r];
330 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
331 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
332 std::invalid_argument, "numEntPerRow(" << r << ") "
333 "specifies an invalid number of entries "
334 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
335 }
336 }
337
338 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
339 // The latter is a const View, so we have to copy into a nonconst
340 // View first, then assign.
341 typedef decltype (k_numAllocPerRow_) out_view_type;
342 typedef typename out_view_type::non_const_type nc_view_type;
343 typedef Kokkos::View<const size_t*,
344 typename nc_view_type::array_layout,
345 Kokkos::HostSpace,
346 Kokkos::MemoryUnmanaged> in_view_type;
347 in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
348 nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
349 lclNumRows);
350 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
351 using exec_space = typename nc_view_type::execution_space;
352 Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
353 k_numAllocPerRow_ = numAllocPerRowOut;
354
355 resumeFill (params);
357 }
358
359
360
361 template <class LocalOrdinal, class GlobalOrdinal, class Node>
362 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
363 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
364 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
365 const Teuchos::RCP<Teuchos::ParameterList>& params) :
366 dist_object_type (rowMap)
367 , rowMap_ (rowMap)
368 , k_numAllocPerRow_ (numEntPerRow.view_host())
370 {
371 const char tfecfFuncName[] =
372 "CrsGraph(rowMap,numEntPerRow,params): ";
373 staticAssertions ();
374
375 const size_t lclNumRows = rowMap.is_null () ?
376 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
377 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
378 static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
379 std::invalid_argument, "numEntPerRow has length " <<
380 numEntPerRow.extent (0) << " != the local number of rows " <<
381 lclNumRows << " as specified by " "the input row Map.");
382
383 if (debug_) {
384 for (size_t r = 0; r < lclNumRows; ++r) {
385 const size_t curRowCount = numEntPerRow.view_host()(r);
386 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
387 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
388 std::invalid_argument, "numEntPerRow(" << r << ") "
389 "specifies an invalid number of entries "
390 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
391 }
392 }
393
394 resumeFill (params);
396 }
397
398
399 template <class LocalOrdinal, class GlobalOrdinal, class Node>
400 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
401 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
402 const Teuchos::RCP<const map_type>& colMap,
403 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
404 const Teuchos::RCP<Teuchos::ParameterList>& params) :
405 dist_object_type (rowMap)
406 , rowMap_ (rowMap)
407 , colMap_ (colMap)
408 , k_numAllocPerRow_ (numEntPerRow.view_host())
410 {
411 const char tfecfFuncName[] =
412 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
413 staticAssertions ();
414
415 const size_t lclNumRows = rowMap.is_null () ?
416 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
417 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
418 static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
419 std::invalid_argument, "numEntPerRow has length " <<
420 numEntPerRow.extent (0) << " != the local number of rows " <<
421 lclNumRows << " as specified by " "the input row Map.");
422
423 if (debug_) {
424 for (size_t r = 0; r < lclNumRows; ++r) {
425 const size_t curRowCount = numEntPerRow.view_host()(r);
426 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
427 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
428 std::invalid_argument, "numEntPerRow(" << r << ") "
429 "specifies an invalid number of entries "
430 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
431 }
432 }
433
434 resumeFill (params);
436 }
437
438
439 template <class LocalOrdinal, class GlobalOrdinal, class Node>
440 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
441 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
442 const Teuchos::RCP<const map_type>& colMap,
443 const Teuchos::ArrayView<const size_t>& numEntPerRow,
444 const Teuchos::RCP<Teuchos::ParameterList>& params) :
445 dist_object_type (rowMap)
446 , rowMap_ (rowMap)
447 , colMap_ (colMap)
449 {
450 const char tfecfFuncName[] =
451 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
452 staticAssertions ();
453
454 const size_t lclNumRows = rowMap.is_null () ?
455 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
456 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
457 static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
458 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
459 << " != the local number of rows " << lclNumRows << " as specified by "
460 "the input row Map.");
461
462 if (debug_) {
463 for (size_t r = 0; r < lclNumRows; ++r) {
464 const size_t curRowCount = numEntPerRow[r];
465 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
466 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
467 std::invalid_argument, "numEntPerRow(" << r << ") "
468 "specifies an invalid number of entries "
469 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
470 }
471 }
472
473 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
474 // The latter is a const View, so we have to copy into a nonconst
475 // View first, then assign.
476 typedef decltype (k_numAllocPerRow_) out_view_type;
477 typedef typename out_view_type::non_const_type nc_view_type;
478 typedef Kokkos::View<const size_t*,
479 typename nc_view_type::array_layout,
480 Kokkos::HostSpace,
481 Kokkos::MemoryUnmanaged> in_view_type;
482 in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
483 nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
484 lclNumRows);
485 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
486 using exec_space = typename nc_view_type::execution_space;
487 Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
488 k_numAllocPerRow_ = numAllocPerRowOut;
489
490 resumeFill (params);
492 }
493
494
495 template <class LocalOrdinal, class GlobalOrdinal, class Node>
496 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
497 CrsGraph (CrsGraph<local_ordinal_type, global_ordinal_type, node_type>& originalGraph,
498 const Teuchos::RCP<const map_type>& rowMap,
499 const Teuchos::RCP<Teuchos::ParameterList>& params) :
500 dist_object_type (rowMap)
501 , rowMap_(rowMap)
502 , colMap_(originalGraph.colMap_)
504 , storageStatus_(originalGraph.storageStatus_)
505 , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
506 , indicesAreLocal_(originalGraph.indicesAreLocal_)
507 , indicesAreSorted_(originalGraph.indicesAreSorted_)
508 {
509 staticAssertions();
510
511 int numRows = rowMap->getLocalNumElements();
512 size_t numNonZeros = originalGraph.getRowPtrsPackedHost()(numRows);
513 auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows+1);
514
515
516 this->setRowPtrsUnpacked(Kokkos::subview(originalGraph.getRowPtrsUnpackedDevice(), rowsToUse));
517 this->setRowPtrsPacked(Kokkos::subview(originalGraph.getRowPtrsPackedDevice(), rowsToUse));
518
519 if (indicesAreLocal_) {
520 lclIndsUnpacked_wdv = local_inds_wdv_type(originalGraph.lclIndsUnpacked_wdv, 0, numNonZeros);
521 lclIndsPacked_wdv = local_inds_wdv_type(originalGraph.lclIndsPacked_wdv, 0, numNonZeros);
522 }
523 else {
524 gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
525 }
526
528 }
529
530 template <class LocalOrdinal, class GlobalOrdinal, class Node>
531 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
532 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
533 const Teuchos::RCP<const map_type>& colMap,
534 const typename local_graph_device_type::row_map_type& rowPointers,
535 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
536 const Teuchos::RCP<Teuchos::ParameterList>& params) :
537 dist_object_type (rowMap)
538 , rowMap_(rowMap)
539 , colMap_(colMap)
541 , storageStatus_(Details::STORAGE_1D_PACKED)
542 , indicesAreAllocated_(true)
543 , indicesAreLocal_(true)
544 {
545 staticAssertions ();
546 if (! params.is_null() && params->isParameter("sorted") &&
547 ! params->get<bool>("sorted")) {
548 indicesAreSorted_ = false;
549 }
550 else {
551 indicesAreSorted_ = true;
552 }
553 setAllIndices (rowPointers, columnIndices);
555 }
556
557 template <class LocalOrdinal, class GlobalOrdinal, class Node>
558 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
559 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
560 const Teuchos::RCP<const map_type>& colMap,
561 const Teuchos::ArrayRCP<size_t>& rowPointers,
562 const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
563 const Teuchos::RCP<Teuchos::ParameterList>& params) :
564 dist_object_type (rowMap)
565 , rowMap_ (rowMap)
566 , colMap_ (colMap)
568 , storageStatus_ (Details::STORAGE_1D_PACKED)
569 , indicesAreAllocated_ (true)
570 , indicesAreLocal_ (true)
571 {
572 staticAssertions ();
573 if (! params.is_null() && params->isParameter("sorted") &&
574 ! params->get<bool>("sorted")) {
575 indicesAreSorted_ = false;
576 }
577 else {
578 indicesAreSorted_ = true;
579 }
580 setAllIndices (rowPointers, columnIndices);
582 }
583
584 template <class LocalOrdinal, class GlobalOrdinal, class Node>
585 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
586 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
587 const Teuchos::RCP<const map_type>& colMap,
588 const local_graph_device_type& k_local_graph_,
589 const Teuchos::RCP<Teuchos::ParameterList>& params)
590 : CrsGraph (k_local_graph_,
591 rowMap,
592 colMap,
593 Teuchos::null,
594 Teuchos::null,
595 params)
596 {}
597
598 template <class LocalOrdinal, class GlobalOrdinal, class Node>
599 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
600 CrsGraph (const local_graph_device_type& k_local_graph_,
601 const Teuchos::RCP<const map_type>& rowMap,
602 const Teuchos::RCP<const map_type>& colMap,
603 const Teuchos::RCP<const map_type>& domainMap,
604 const Teuchos::RCP<const map_type>& rangeMap,
605 const Teuchos::RCP<Teuchos::ParameterList>& params)
606 : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
607 , rowMap_ (rowMap)
608 , colMap_ (colMap)
610 , storageStatus_ (Details::STORAGE_1D_PACKED)
611 , indicesAreAllocated_ (true)
612 , indicesAreLocal_ (true)
613 {
614 staticAssertions();
615 const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
616
617 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
618 colMap.is_null (), std::runtime_error,
619 ": The input column Map must be nonnull.");
620 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
621 k_local_graph_.numRows () != rowMap->getLocalNumElements (),
622 std::runtime_error,
623 ": The input row Map and the input local graph need to have the same "
624 "number of rows. The row Map claims " << rowMap->getLocalNumElements ()
625 << " row(s), but the local graph claims " << k_local_graph_.numRows ()
626 << " row(s).");
627
628 // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
629 // rowMap_->getLocalNumElements(), but it doesn't have to.
630 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
631 // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
632 // ": The input row Map and the input local graph need to have the same "
633 // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
634 // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
636 lclIndsUnpacked_wdv.extent (0) != 0 || gblInds_wdv.extent (0) != 0, std::logic_error,
637 ": cannot have 1D data structures allocated.");
638
639 if(! params.is_null() && params->isParameter("sorted") &&
640 ! params->get<bool>("sorted")) {
641 indicesAreSorted_ = false;
642 }
643 else {
644 indicesAreSorted_ = true;
645 }
646
647 setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
648 rangeMap .is_null() ? rowMap_ : rangeMap);
649 Teuchos::Array<int> remotePIDs (0); // unused output argument
650 this->makeImportExport (remotePIDs, false);
651
652 lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
654 this->setRowPtrs(k_local_graph_.row_map);
655
656 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
657
658 const bool callComputeGlobalConstants = params.get () == nullptr ||
659 params->get ("compute global constants", true);
660
661 if (callComputeGlobalConstants) {
662 this->computeGlobalConstants ();
663 }
664 this->fillComplete_ = true;
665 this->checkInternalState ();
666 }
667
668 template <class LocalOrdinal, class GlobalOrdinal, class Node>
669 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
670 CrsGraph (const local_graph_device_type& lclGraph,
671 const Teuchos::RCP<const map_type>& rowMap,
672 const Teuchos::RCP<const map_type>& colMap,
673 const Teuchos::RCP<const map_type>& domainMap,
674 const Teuchos::RCP<const map_type>& rangeMap,
675 const Teuchos::RCP<const import_type>& importer,
676 const Teuchos::RCP<const export_type>& exporter,
677 const Teuchos::RCP<Teuchos::ParameterList>& params) :
678 DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
679 rowMap_ (rowMap),
680 colMap_ (colMap),
681 rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
682 domainMap_ (domainMap.is_null () ? rowMap : domainMap),
683 importer_ (importer),
684 exporter_ (exporter),
686 storageStatus_ (Details::STORAGE_1D_PACKED),
687 indicesAreAllocated_ (true),
688 indicesAreLocal_ (true)
689 {
690 staticAssertions();
691 const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_device_type,"
692 "Map,Map,Map,Map,Import,Export,params): ";
693
694 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
695 (colMap.is_null (), std::runtime_error,
696 "The input column Map must be nonnull.");
697
698 lclIndsPacked_wdv = local_inds_wdv_type(lclGraph.entries);
700 setRowPtrs(lclGraph.row_map);
701
702 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
703
704 if (! params.is_null() && params->isParameter("sorted") &&
705 ! params->get<bool>("sorted")) {
706 indicesAreSorted_ = false;
707 }
708 else {
709 indicesAreSorted_ = true;
710 }
711
712 const bool callComputeGlobalConstants =
713 params.get () == nullptr ||
714 params->get ("compute global constants", true);
715 if (callComputeGlobalConstants) {
716 this->computeGlobalConstants ();
717 }
718 fillComplete_ = true;
720 }
721
722 template <class LocalOrdinal, class GlobalOrdinal, class Node>
723 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
724 CrsGraph (const row_ptrs_device_view_type& rowPointers,
725 const local_inds_wdv_type& columnIndices,
726 const Teuchos::RCP<const map_type>& rowMap,
727 const Teuchos::RCP<const map_type>& colMap,
728 const Teuchos::RCP<const map_type>& domainMap,
729 const Teuchos::RCP<const map_type>& rangeMap,
730 const Teuchos::RCP<const import_type>& importer,
731 const Teuchos::RCP<const export_type>& exporter,
732 const Teuchos::RCP<Teuchos::ParameterList>& params) :
733 DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
734 rowMap_ (rowMap),
735 colMap_ (colMap),
736 rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
737 domainMap_ (domainMap.is_null () ? rowMap : domainMap),
738 importer_ (importer),
739 exporter_ (exporter),
741 storageStatus_ (Details::STORAGE_1D_PACKED),
742 indicesAreAllocated_ (true),
743 indicesAreLocal_ (true)
744 {
745 staticAssertions();
746 const char tfecfFuncName[] = "Tpetra::CrsGraph(row_ptrs_device_view_type,local_inds_wdv_type"
747 "Map,Map,Map,Map,Import,Export,params): ";
748
749 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
750 (colMap.is_null (), std::runtime_error,
751 "The input column Map must be nonnull.");
752
753 lclIndsPacked_wdv = columnIndices;
755 setRowPtrs(rowPointers);
756
757 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
758
759 if (! params.is_null() && params->isParameter("sorted") &&
760 ! params->get<bool>("sorted")) {
761 indicesAreSorted_ = false;
762 }
763 else {
764 indicesAreSorted_ = true;
765 }
766
767 const bool callComputeGlobalConstants =
768 params.get () == nullptr ||
769 params->get ("compute global constants", true);
770 if (callComputeGlobalConstants) {
771 this->computeGlobalConstants ();
772 }
773 fillComplete_ = true;
775 }
776
777 template <class LocalOrdinal, class GlobalOrdinal, class Node>
778 Teuchos::RCP<const Teuchos::ParameterList>
780 getValidParameters () const
781 {
782 using Teuchos::RCP;
783 using Teuchos::ParameterList;
784 using Teuchos::parameterList;
785
786 RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
787
788 // Make a sublist for the Import.
789 RCP<ParameterList> importSublist = parameterList ("Import");
790
791 // FIXME (mfh 02 Apr 2012) We should really have the Import and
792 // Export objects fill in these lists. However, we don't want to
793 // create an Import or Export unless we need them. For now, we
794 // know that the Import and Export just pass the list directly to
795 // their Distributor, so we can create a Distributor here
796 // (Distributor's constructor is a lightweight operation) and have
797 // it fill in the list.
798
799 // Fill in Distributor default parameters by creating a
800 // Distributor and asking it to do the work.
801 Distributor distributor (rowMap_->getComm (), importSublist);
802 params->set ("Import", *importSublist, "How the Import performs communication.");
803
804 // Make a sublist for the Export. For now, it's a clone of the
805 // Import sublist. It's not a shallow copy, though, since we
806 // might like the Import to do communication differently than the
807 // Export.
808 params->set ("Export", *importSublist, "How the Export performs communication.");
809
810 return params;
811 }
812
813 template <class LocalOrdinal, class GlobalOrdinal, class Node>
814 void
816 setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
817 {
818 Teuchos::RCP<const Teuchos::ParameterList> validParams =
820 params->validateParametersAndSetDefaults (*validParams);
821 this->setMyParamList (params);
822 }
823
824 template <class LocalOrdinal, class GlobalOrdinal, class Node>
827 getGlobalNumRows () const
828 {
829 return rowMap_->getGlobalNumElements ();
830 }
831
832 template <class LocalOrdinal, class GlobalOrdinal, class Node>
835 getGlobalNumCols () const
836 {
837 const char tfecfFuncName[] = "getGlobalNumCols: ";
838 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
839 ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
840 "The graph does not have a domain Map. You may not call this method in "
841 "that case.");
842 return getDomainMap ()->getGlobalNumElements ();
843 }
844
845
846 template <class LocalOrdinal, class GlobalOrdinal, class Node>
847 size_t
849 getLocalNumRows () const
850 {
851 return this->rowMap_.is_null () ?
852 static_cast<size_t> (0) :
853 this->rowMap_->getLocalNumElements ();
854 }
855
856
857 template <class LocalOrdinal, class GlobalOrdinal, class Node>
858 size_t
860 getLocalNumCols () const
861 {
862 const char tfecfFuncName[] = "getLocalNumCols: ";
863 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
864 ! hasColMap (), std::runtime_error,
865 "The graph does not have a column Map. You may not call this method "
866 "unless the graph has a column Map. This requires either that a custom "
867 "column Map was given to the constructor, or that fillComplete() has "
868 "been called.");
869 return colMap_.is_null () ? static_cast<size_t> (0) :
870 colMap_->getLocalNumElements ();
871 }
872
873
874
875 template <class LocalOrdinal, class GlobalOrdinal, class Node>
876 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
882
883 template <class LocalOrdinal, class GlobalOrdinal, class Node>
884 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
890
891 template <class LocalOrdinal, class GlobalOrdinal, class Node>
892 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
898
899 template <class LocalOrdinal, class GlobalOrdinal, class Node>
900 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
906
907 template <class LocalOrdinal, class GlobalOrdinal, class Node>
908 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
914
915 template <class LocalOrdinal, class GlobalOrdinal, class Node>
916 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
922
923 template <class LocalOrdinal, class GlobalOrdinal, class Node>
924 bool
926 hasColMap () const
927 {
928 return ! colMap_.is_null ();
929 }
930
931 template <class LocalOrdinal, class GlobalOrdinal, class Node>
932 bool
934 isStorageOptimized () const
935 {
936 // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
937 // getLocalNumRows() is zero?
938
939 const bool isOpt = indicesAreAllocated_ &&
940 k_numRowEntries_.extent (0) == 0 &&
941 getLocalNumRows () > 0;
942
943 return isOpt;
944 }
945
946
947 template <class LocalOrdinal, class GlobalOrdinal, class Node>
951 {
952 const char tfecfFuncName[] = "getGlobalNumEntries: ";
953 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
954 (! this->haveGlobalConstants_, std::logic_error,
955 "The graph does not have global constants computed, "
956 "but the user has requested them.");
957
958 return globalNumEntries_;
959 }
960
961
962 template <class LocalOrdinal, class GlobalOrdinal, class Node>
963 size_t
965 getLocalNumEntries () const
966 {
967 const char tfecfFuncName[] = "getLocalNumEntries: ";
968 typedef LocalOrdinal LO;
969
970 if (this->indicesAreAllocated_) {
971 const LO lclNumRows = this->getLocalNumRows ();
972 if (lclNumRows == 0) {
973 return static_cast<size_t> (0);
974 }
975 else {
976 // Avoid the "*this capture" issue by creating a local Kokkos::View.
977 auto numEntPerRow = this->k_numRowEntries_;
978 const LO numNumEntPerRow = numEntPerRow.extent (0);
979 if (numNumEntPerRow == 0) {
980 if (static_cast<LO> (this->getRowPtrsPackedDevice().extent (0)) <
981 static_cast<LO> (lclNumRows + 1)) {
982 return static_cast<size_t> (0);
983 }
984 else {
985 // indices are allocated and k_numRowEntries_ is not allocated,
986 // so we have packed storage and the length of lclIndsPacked_wdv
987 // must be the number of local entries.
988 if(debug_) {
989 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
990 (this->getRowPtrsPackedHost()(lclNumRows) != lclIndsPacked_wdv.extent(0), std::logic_error,
991 "Final entry of packed host rowptrs doesn't match the length of lclIndsPacked");
992 }
993 return lclIndsPacked_wdv.extent(0);
994 }
995 }
996 else { // k_numRowEntries_ is populated
997 // k_numRowEntries_ is actually be a host View, so we run
998 // the sum in its native execution space. This also means
999 // that we can use explicit capture (which could perhaps
1000 // improve build time) instead of KOKKOS_LAMBDA, and avoid
1001 // any CUDA build issues with trying to run a __device__ -
1002 // only function on host.
1003 typedef typename num_row_entries_type::execution_space
1004 host_exec_space;
1005 typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
1006
1007 const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
1008 lclNumRows :
1009 numNumEntPerRow;
1010 size_t nodeNumEnt = 0;
1011 Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
1012 range_type (0, upperLoopBound),
1013 [=] (const LO& k, size_t& lclSum) {
1014 lclSum += numEntPerRow(k);
1015 }, nodeNumEnt);
1016 return nodeNumEnt;
1017 }
1018 }
1019 }
1020 else { // nothing allocated on this process, so no entries
1021 return static_cast<size_t> (0);
1022 }
1023 }
1024
1025 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1029 {
1030 const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
1031 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1032 (! this->haveGlobalConstants_, std::logic_error,
1033 "The graph does not have global constants computed, "
1034 "but the user has requested them.");
1035
1037 }
1038
1039 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1040 size_t
1046
1047 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1048 bool
1050 isFillComplete () const
1051 {
1052 return fillComplete_;
1053 }
1054
1055 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1056 bool
1058 isFillActive () const
1059 {
1060 return ! fillComplete_;
1061 }
1062
1063
1064 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1065 bool
1067 isLocallyIndexed () const
1068 {
1069 return indicesAreLocal_;
1070 }
1071
1072 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1073 bool
1075 isGloballyIndexed () const
1076 {
1077 return indicesAreGlobal_;
1078 }
1079
1080 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1081 size_t
1084 {
1085 typedef LocalOrdinal LO;
1086
1087 if (this->indicesAreAllocated_) {
1088 const LO lclNumRows = this->getLocalNumRows ();
1089 if (lclNumRows == 0) {
1090 return static_cast<size_t> (0);
1091 }
1092 else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1093 if (static_cast<LO> (this->getRowPtrsPackedDevice().extent (0)) <
1094 static_cast<LO> (lclNumRows + 1)) {
1095 return static_cast<size_t> (0);
1096 }
1097 else {
1098 if(this->isLocallyIndexed())
1099 return lclIndsPacked_wdv.extent(0);
1100 else
1101 return gblInds_wdv.extent(0);
1102 }
1103 }
1104 else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1105 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1106 if (rowPtrsUnpacked_host.extent (0) == 0) {
1107 return static_cast<size_t> (0);
1108 }
1109 else {
1110 if(this->isLocallyIndexed())
1111 return lclIndsUnpacked_wdv.extent(0);
1112 else
1113 return gblInds_wdv.extent(0);
1114 }
1115 }
1116 else {
1117 return static_cast<size_t> (0);
1118 }
1119 }
1120 else {
1121 return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1122 }
1123 }
1124
1125 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1126 Teuchos::RCP<const Teuchos::Comm<int> >
1128 getComm () const
1129 {
1130 return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1131 }
1132
1133 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1134 GlobalOrdinal
1136 getIndexBase () const
1137 {
1138 return rowMap_->getIndexBase ();
1139 }
1140
1141 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1142 bool
1144 indicesAreAllocated () const
1145 {
1146 return indicesAreAllocated_;
1147 }
1148
1149 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1150 bool
1156
1157 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1158 bool
1164
1165 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1166 void
1169 {
1170 // FIXME (mfh 07 May 2013) How do we know that the change
1171 // introduced a redundancy, or even that it invalidated the sorted
1172 // order of indices? CrsGraph has always made this conservative
1173 // guess. It could be a bit costly to check at insertion time,
1174 // though.
1175 indicesAreSorted_ = false;
1176 noRedundancies_ = false;
1177
1178 // We've modified the graph, so we'll have to recompute local
1179 // constants like the number of diagonal entries on this process.
1180 haveLocalConstants_ = false;
1181 }
1182
1183 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1184 void
1186 allocateIndices (const ELocalGlobal lg, const bool verbose)
1187 {
1189 using Teuchos::arcp;
1190 using Teuchos::Array;
1191 using Teuchos::ArrayRCP;
1192 using std::endl;
1193 typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1194 typedef typename local_graph_device_type::row_map_type::non_const_type
1195 non_const_row_map_type;
1196 const char tfecfFuncName[] = "allocateIndices: ";
1197 const char suffix[] =
1198 " Please report this bug to the Tpetra developers.";
1199 ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1200
1201 std::unique_ptr<std::string> prefix;
1202 if (verbose) {
1203 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1204 std::ostringstream os;
1205 os << *prefix << "Start: lg="
1206 << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1207 << ", numRows: " << this->getLocalNumRows() << endl;
1208 std::cerr << os.str();
1209 }
1210
1211 // This is a protected function, only callable by us. If it was
1212 // called incorrectly, it is our fault. That's why the tests
1213 // below throw std::logic_error instead of std::invalid_argument.
1214 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1215 (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1216 ": The graph is locally indexed, but Tpetra code is calling "
1217 "this method with lg=GlobalIndices." << suffix);
1218 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1219 (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1220 ": The graph is globally indexed, but Tpetra code is calling "
1221 "this method with lg=LocalIndices." << suffix);
1222 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1223 (indicesAreAllocated (), std::logic_error, ": The graph's "
1224 "indices are already allocated, but Tpetra is calling "
1225 "allocateIndices again." << suffix);
1226 const size_t numRows = this->getLocalNumRows ();
1227
1228 //
1229 // STATIC ALLOCATION PROFILE
1230 //
1231 size_type numInds = 0;
1232 {
1233 if (verbose) {
1234 std::ostringstream os;
1235 os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1236 std::cerr << os.str();
1237 }
1238 non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1239
1240 if (this->k_numAllocPerRow_.extent (0) != 0) {
1241 // It's OK to throw std::invalid_argument here, because we
1242 // haven't incurred any side effects yet. Throwing that
1243 // exception (and not, say, std::logic_error) implies that the
1244 // instance can recover.
1245 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1246 (this->k_numAllocPerRow_.extent (0) != numRows,
1247 std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1248 "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1249 << ", but its length != numRows = " << numRows << ".");
1250
1251 // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1252 // we want to compute here) lives on device. That's OK;
1253 // computeOffsetsFromCounts can handle this case.
1255
1256 // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1257 // doesn't attempt to check its input for "invalid" flag
1258 // values. For now, we omit that feature of the sequential
1259 // code disabled below.
1260 numInds = computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1261 }
1262 else {
1263 // It's OK to throw std::invalid_argument here, because we
1264 // haven't incurred any side effects yet. Throwing that
1265 // exception (and not, say, std::logic_error) implies that the
1266 // instance can recover.
1267 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1268 (this->numAllocForAllRows_ ==
1269 Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1270 std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1271 "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1272 Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1273
1275 numInds = computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1276 }
1277 // "Commit" the resulting row offsets.
1278 setRowPtrsUnpacked(k_rowPtrs);
1279 }
1280 if(debug_) {
1281 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1282 (numInds != size_type(this->getRowPtrsUnpackedHost()(numRows)), std::logic_error,
1283 ": Number of indices produced by computeOffsetsFrom[Constant]Counts "
1284 "does not match final entry of rowptrs unpacked");
1285 }
1286
1287
1288 if (lg == LocalIndices) {
1289 if (verbose) {
1290 std::ostringstream os;
1291 os << *prefix << "Allocate local column indices "
1292 "lclIndsUnpacked_wdv: " << numInds << endl;
1293 std::cerr << os.str();
1294 }
1295 lclIndsUnpacked_wdv = local_inds_wdv_type (
1296 local_inds_dualv_type("Tpetra::CrsGraph::lclInd",numInds));
1297 }
1298 else {
1299 if (verbose) {
1300 std::ostringstream os;
1301 os << *prefix << "Allocate global column indices "
1302 "gblInds_wdv: " << numInds << endl;
1303 std::cerr << os.str();
1304 }
1305 gblInds_wdv = global_inds_wdv_type (
1306 global_inds_dualv_type("Tpetra::CrsGraph::gblInd",numInds));
1307 }
1308 storageStatus_ = Details::STORAGE_1D_UNPACKED;
1309
1310 this->indicesAreLocal_ = (lg == LocalIndices);
1311 this->indicesAreGlobal_ = (lg == GlobalIndices);
1312
1313 if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1314 using Kokkos::ViewAllocateWithoutInitializing;
1315 const char label[] = "Tpetra::CrsGraph::numRowEntries";
1316 if (verbose) {
1317 std::ostringstream os;
1318 os << *prefix << "Allocate k_numRowEntries_: " << numRows
1319 << endl;
1320 std::cerr << os.str();
1321 }
1322 num_row_entries_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1323 // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1324 Kokkos::deep_copy (execution_space(), numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1325 Kokkos::fence("CrsGraph::allocateIndices"); // TODO: Need to understand downstream failure points and move this fence.
1326 this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1327 }
1328
1329 // Once indices are allocated, CrsGraph needs to free this information.
1330 this->numAllocForAllRows_ = 0;
1331 this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1332 this->indicesAreAllocated_ = true;
1333
1334 try {
1335 this->checkInternalState ();
1336 }
1337 catch (std::logic_error& e) {
1338 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1339 (true, std::logic_error, "At end of allocateIndices, "
1340 "checkInternalState threw std::logic_error: "
1341 << e.what ());
1342 }
1343 catch (std::exception& e) {
1344 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1345 (true, std::runtime_error, "At end of allocateIndices, "
1346 "checkInternalState threw std::exception: "
1347 << e.what ());
1348 }
1349 catch (...) {
1350 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1351 (true, std::runtime_error, "At end of allocateIndices, "
1352 "checkInternalState threw an exception "
1353 "not a subclass of std::exception.");
1354 }
1355
1356 if (verbose) {
1357 std::ostringstream os;
1358 os << *prefix << "Done" << endl;
1359 std::cerr << os.str();
1360 }
1361 }
1362
1363 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1365 local_inds_dualv_type::t_host::const_type
1367 getLocalIndsViewHost (const RowInfo& rowinfo) const
1368 {
1369 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1370 return typename local_inds_dualv_type::t_host::const_type ();
1371 else
1372 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1373 rowinfo.allocSize,
1374 Access::ReadOnly);
1375 }
1376
1377 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1379 local_inds_dualv_type::t_host
1381 getLocalIndsViewHostNonConst (const RowInfo& rowinfo)
1382 {
1383 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1384 return typename local_inds_dualv_type::t_host ();
1385 else
1386 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1387 rowinfo.allocSize,
1388 Access::ReadWrite);
1389 }
1390
1391 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1393 global_inds_dualv_type::t_host::const_type
1395 getGlobalIndsViewHost (const RowInfo& rowinfo) const
1396 {
1397 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1398 return typename global_inds_dualv_type::t_host::const_type ();
1399 else
1400 return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1401 rowinfo.allocSize,
1402 Access::ReadOnly);
1403 }
1404
1405 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1407 local_inds_dualv_type::t_dev::const_type
1409 getLocalIndsViewDevice (const RowInfo& rowinfo) const
1410 {
1411 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1412 return typename local_inds_dualv_type::t_dev::const_type ();
1413 else
1414 return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1415 rowinfo.allocSize,
1416 Access::ReadOnly);
1417 }
1418
1419 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1421 global_inds_dualv_type::t_dev::const_type
1423 getGlobalIndsViewDevice (const RowInfo& rowinfo) const
1424 {
1425 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1426 return typename global_inds_dualv_type::t_dev::const_type ();
1427 else
1428 return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1429 rowinfo.allocSize,
1430 Access::ReadOnly);
1431 }
1432
1433
1434 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1435 RowInfo
1437 getRowInfo (const LocalOrdinal myRow) const
1438 {
1439 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1440 RowInfo ret;
1441 if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1442 ret.localRow = STINV;
1443 ret.allocSize = 0;
1444 ret.numEntries = 0;
1445 ret.offset1D = STINV;
1446 return ret;
1447 }
1448
1449 ret.localRow = static_cast<size_t> (myRow);
1450 if (this->indicesAreAllocated ()) {
1451 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1452 // Offsets tell us the allocation size in this case.
1453 if (rowPtrsUnpacked_host.extent (0) == 0) {
1454 ret.offset1D = 0;
1455 ret.allocSize = 0;
1456 }
1457 else {
1458 ret.offset1D = rowPtrsUnpacked_host(myRow);
1459 ret.allocSize = rowPtrsUnpacked_host(myRow+1) - rowPtrsUnpacked_host(myRow);
1460 }
1461
1462 ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1463 ret.allocSize :
1464 this->k_numRowEntries_(myRow);
1465 }
1466 else { // haven't performed allocation yet; probably won't hit this code
1467 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1468 // allocate, rather than doing lazy allocation at first insert.
1469 // This will make k_numAllocPerRow_ obsolete.
1470 ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1471 this->k_numAllocPerRow_(myRow) : // this is a host View
1472 this->numAllocForAllRows_;
1473 ret.numEntries = 0;
1474 ret.offset1D = STINV;
1475 }
1476
1477 return ret;
1478 }
1479
1480
1481 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1482 RowInfo
1484 getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1485 {
1486 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1487 RowInfo ret;
1488 if (this->rowMap_.is_null ()) {
1489 ret.localRow = STINV;
1490 ret.allocSize = 0;
1491 ret.numEntries = 0;
1492 ret.offset1D = STINV;
1493 return ret;
1494 }
1495 const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1496 if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1497 ret.localRow = STINV;
1498 ret.allocSize = 0;
1499 ret.numEntries = 0;
1500 ret.offset1D = STINV;
1501 return ret;
1502 }
1503
1504 ret.localRow = static_cast<size_t> (myRow);
1505 if (this->indicesAreAllocated ()) {
1506 // graph data structures have the info that we need
1507 //
1508 // if static graph, offsets tell us the allocation size
1509 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
1510 if (rowPtrsUnpacked_host.extent (0) == 0) {
1511 ret.offset1D = 0;
1512 ret.allocSize = 0;
1513 }
1514 else {
1515 ret.offset1D = rowPtrsUnpacked_host(myRow);
1516 ret.allocSize = rowPtrsUnpacked_host(myRow+1) - rowPtrsUnpacked_host(myRow);
1517 }
1518
1519 ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1520 ret.allocSize :
1521 this->k_numRowEntries_(myRow);
1522 }
1523 else { // haven't performed allocation yet; probably won't hit this code
1524 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1525 // allocate, rather than doing lazy allocation at first insert.
1526 // This will make k_numAllocPerRow_ obsolete.
1527 ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1528 this->k_numAllocPerRow_(myRow) : // this is a host View
1529 this->numAllocForAllRows_;
1530 ret.numEntries = 0;
1531 ret.offset1D = STINV;
1532 }
1533
1534 return ret;
1535 }
1536
1537
1538 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1539 void
1541 staticAssertions () const
1542 {
1543 using Teuchos::OrdinalTraits;
1544 typedef LocalOrdinal LO;
1545 typedef GlobalOrdinal GO;
1546 typedef global_size_t GST;
1547
1548 // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1549 // This is so that we can store local indices in the memory
1550 // formerly occupied by global indices.
1551 static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1552 "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1553 // Assumption: max(size_t) >= max(LocalOrdinal)
1554 // This is so that we can represent any LocalOrdinal as a size_t.
1555 static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1556 "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1557 static_assert (sizeof(GST) >= sizeof(size_t),
1558 "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1559
1560 // FIXME (mfh 30 Sep 2015) We're not using
1561 // Teuchos::CompileTimeAssert any more. Can we do these checks
1562 // with static_assert?
1563
1564 // can't call max() with CompileTimeAssert, because it isn't a
1565 // constant expression; will need to make this a runtime check
1566 const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1567 "given template arguments: size assumptions are not valid.";
1568 TEUCHOS_TEST_FOR_EXCEPTION(
1569 static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1570 std::runtime_error, msg);
1571 TEUCHOS_TEST_FOR_EXCEPTION(
1572 static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1573 std::runtime_error, msg);
1574 TEUCHOS_TEST_FOR_EXCEPTION(
1575 static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1576 std::runtime_error, msg);
1577 TEUCHOS_TEST_FOR_EXCEPTION(
1578 Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1579 std::runtime_error, msg);
1580 }
1581
1582
1583 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1584 size_t
1586 insertIndices (RowInfo& rowinfo,
1587 const SLocalGlobalViews &newInds,
1588 const ELocalGlobal lg,
1589 const ELocalGlobal I)
1590 {
1591 using Teuchos::ArrayView;
1592 typedef LocalOrdinal LO;
1593 typedef GlobalOrdinal GO;
1594 const char tfecfFuncName[] = "insertIndices: ";
1595
1596 size_t oldNumEnt = 0;
1597 if (debug_) {
1598 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1599 (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1600 "lg must be either GlobalIndices or LocalIndices.");
1601 oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1602 }
1603
1604 size_t numNewInds = 0;
1605 if (lg == GlobalIndices) { // input indices are global
1606 ArrayView<const GO> new_ginds = newInds.ginds;
1607 numNewInds = new_ginds.size();
1608 if (I == GlobalIndices) { // store global indices
1609 auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1610 if (debug_) {
1611 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1612 (static_cast<size_t> (gind_view.size ()) <
1613 rowinfo.numEntries + numNewInds, std::logic_error,
1614 "gind_view.size() = " << gind_view.size ()
1615 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1616 << ") + numNewInds (= " << numNewInds << ").");
1617 }
1618 GO* const gblColInds_out = gind_view.data () + rowinfo.offset1D
1619 + rowinfo.numEntries;
1620 for (size_t k = 0; k < numNewInds; ++k) {
1621 gblColInds_out[k] = new_ginds[k];
1622 }
1623 }
1624 else if (I == LocalIndices) { // store local indices
1625 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1626 if (debug_) {
1627 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1628 (static_cast<size_t> (lind_view.size ()) <
1629 rowinfo.numEntries + numNewInds, std::logic_error,
1630 "lind_view.size() = " << lind_view.size ()
1631 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1632 << ") + numNewInds (= " << numNewInds << ").");
1633 }
1634 LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1635 + rowinfo.numEntries;
1636 for (size_t k = 0; k < numNewInds; ++k) {
1637 lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1638 }
1639 }
1640 }
1641 else if (lg == LocalIndices) { // input indices are local
1642 ArrayView<const LO> new_linds = newInds.linds;
1643 numNewInds = new_linds.size();
1644 if (I == LocalIndices) { // store local indices
1645 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1646 if (debug_) {
1647 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1648 (static_cast<size_t> (lind_view.size ()) <
1649 rowinfo.numEntries + numNewInds, std::logic_error,
1650 "lind_view.size() = " << lind_view.size ()
1651 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1652 << ") + numNewInds (= " << numNewInds << ").");
1653 }
1654 LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1655 + rowinfo.numEntries;
1656 for (size_t k = 0; k < numNewInds; ++k) {
1657 lclColInds_out[k] = new_linds[k];
1658 }
1659 }
1660 else if (I == GlobalIndices) {
1661 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1662 (true, std::logic_error, "The case where the input indices are local "
1663 "and the indices to write are global (lg=LocalIndices, I="
1664 "GlobalIndices) is not implemented, because it does not make sense."
1665 << std::endl << "If you have correct local column indices, that "
1666 "means the graph has a column Map. In that case, you should be "
1667 "storing local indices.");
1668 }
1669 }
1670
1671 rowinfo.numEntries += numNewInds;
1672 this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1673 this->setLocallyModified ();
1674
1675 if (debug_) {
1676 const size_t chkNewNumEnt =
1677 this->getNumEntriesInLocalRow (rowinfo.localRow);
1678 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1679 (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1680 "chkNewNumEnt = " << chkNewNumEnt
1681 << " != oldNumEnt (= " << oldNumEnt
1682 << ") + numNewInds (= " << numNewInds << ").");
1683 }
1684
1685 return numNewInds;
1686 }
1687
1688 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1689 size_t
1691 insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1692 const GlobalOrdinal inputGblColInds[],
1693 const size_t numInputInds)
1694 {
1695 return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1696 inputGblColInds, numInputInds);
1697 }
1698
1699 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1700 size_t
1702 insertGlobalIndicesImpl (const RowInfo& rowInfo,
1703 const GlobalOrdinal inputGblColInds[],
1704 const size_t numInputInds,
1705 std::function<void(const size_t, const size_t, const size_t)> fun)
1706 {
1708 using Kokkos::View;
1709 using Kokkos::subview;
1710 using Kokkos::MemoryUnmanaged;
1711 using Teuchos::ArrayView;
1712 using LO = LocalOrdinal;
1713 using GO = GlobalOrdinal;
1714 const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1715 const LO lclRow = static_cast<LO> (rowInfo.localRow);
1716
1717 auto numEntries = rowInfo.numEntries;
1718 using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1719 inp_view_type inputInds(inputGblColInds, numInputInds);
1720 size_t numInserted;
1721 {
1722 auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1723 numInserted = Details::insertCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1724 gblIndsHostView,
1725 numEntries, inputInds, fun);
1726 }
1727
1728 const bool insertFailed =
1729 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1730 if(insertFailed) {
1731 constexpr size_t ONE (1);
1732 const int myRank = this->getComm()->getRank();
1733 std::ostringstream os;
1734
1735 os << "Proc " << myRank << ": Not enough capacity to insert "
1736 << numInputInds
1737 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1738 << " into local row " << lclRow << ", which currently has "
1739 << rowInfo.numEntries
1740 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1741 << " and total allocation size " << rowInfo.allocSize
1742 << ". ";
1743 const size_t maxNumToPrint =
1745 ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1746 numInputInds);
1747 verbosePrintArray(os, inputGblColIndsView, "Input global "
1748 "column indices", maxNumToPrint);
1749 os << ", ";
1750 auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1751 ArrayView<const GO> curGblColIndsView(curGblColInds.data(),
1752 rowInfo.numEntries);
1753 verbosePrintArray(os, curGblColIndsView, "Current global "
1754 "column indices", maxNumToPrint);
1755 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1756 (true, std::runtime_error, os.str());
1757 }
1758
1759 this->k_numRowEntries_(lclRow) += numInserted;
1760
1761 this->setLocallyModified();
1762 return numInserted;
1763 }
1764
1765
1766 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1767 void
1769 insertLocalIndicesImpl (const LocalOrdinal myRow,
1770 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1771 std::function<void(const size_t, const size_t, const size_t)> fun)
1772 {
1773 using Kokkos::MemoryUnmanaged;
1774 using Kokkos::subview;
1775 using Kokkos::View;
1776 using LO = LocalOrdinal;
1777 const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1778
1779 const RowInfo rowInfo = this->getRowInfo(myRow);
1780
1781 size_t numNewInds = 0;
1782 size_t newNumEntries = 0;
1783
1784 auto numEntries = rowInfo.numEntries;
1785 // Note: Teuchos::ArrayViews are in HostSpace
1786 using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1787 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1788 size_t numInserted = 0;
1789 {
1790 auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1791 numInserted = Details::insertCrsIndices(myRow, this->getRowPtrsUnpackedHost(), lclInds,
1792 numEntries, inputInds, fun);
1793 }
1794
1795 const bool insertFailed =
1796 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1797 if(insertFailed) {
1798 constexpr size_t ONE (1);
1799 const size_t numInputInds(indices.size());
1800 const int myRank = this->getComm()->getRank();
1801 std::ostringstream os;
1802 os << "On MPI Process " << myRank << ": Not enough capacity to "
1803 "insert " << numInputInds
1804 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1805 << " into local row " << myRow << ", which currently has "
1806 << rowInfo.numEntries
1807 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1808 << " and total allocation size " << rowInfo.allocSize << ".";
1809 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1810 (true, std::runtime_error, os.str());
1811 }
1812 numNewInds = numInserted;
1813 newNumEntries = rowInfo.numEntries + numNewInds;
1814
1815 this->k_numRowEntries_(myRow) += numNewInds;
1816 this->setLocallyModified ();
1817
1818 if (debug_) {
1819 const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1820 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1821 (chkNewNumEntries != newNumEntries, std::logic_error,
1822 "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1823 << " != newNumEntries = " << newNumEntries
1824 << ". Please report this bug to the Tpetra developers.");
1825 }
1826 }
1827
1828 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1829 size_t
1831 findGlobalIndices(const RowInfo& rowInfo,
1832 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1833 std::function<void(const size_t, const size_t, const size_t)> fun) const
1834 {
1835 using GO = GlobalOrdinal;
1836 using Kokkos::View;
1837 using Kokkos::MemoryUnmanaged;
1838 auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1839
1840 using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1841 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1842
1843 size_t numFound = 0;
1844 LocalOrdinal lclRow = rowInfo.localRow;
1845 if (this->isLocallyIndexed())
1846 {
1847 if (this->colMap_.is_null())
1848 return invalidCount;
1849 const auto& colMap = *(this->colMap_);
1850 auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1851 numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1852 rowInfo.numEntries,
1853 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1854 }
1855 else if (this->isGloballyIndexed())
1856 {
1857 numFound = Details::findCrsIndices(lclRow, this->getRowPtrsUnpackedHost(),
1858 rowInfo.numEntries,
1859 gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1860 }
1861 return numFound;
1862 }
1863
1864
1865 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1866 size_t
1868 sortAndMergeRowIndices (const RowInfo& rowInfo,
1869 const bool sorted,
1870 const bool merged)
1871 {
1872 const size_t origNumEnt = rowInfo.numEntries;
1873 if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
1874 origNumEnt != 0) {
1875 auto lclColInds = this->getLocalIndsViewHostNonConst (rowInfo);
1876
1877 LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
1878 if (! sorted) {
1879 std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
1880 }
1881
1882 if (! merged) {
1883 LocalOrdinal* const beg = lclColIndsRaw;
1884 LocalOrdinal* const end = beg + rowInfo.numEntries;
1885 LocalOrdinal* const newend = std::unique (beg, end);
1886 const size_t newNumEnt = newend - beg;
1887
1888 // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1889 this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1890 return origNumEnt - newNumEnt; // the number of duplicates in the row
1891 }
1892 else {
1893 return static_cast<size_t> (0); // assume no duplicates
1894 }
1895 }
1896 else {
1897 return static_cast<size_t> (0); // no entries in the row
1898 }
1899 }
1900
1901
1902 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1903 void
1905 setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
1906 const Teuchos::RCP<const map_type>& rangeMap)
1907 {
1908 // simple pointer comparison for equality
1909 if (domainMap_ != domainMap) {
1910 domainMap_ = domainMap;
1911 importer_ = Teuchos::null;
1912 }
1913 if (rangeMap_ != rangeMap) {
1914 rangeMap_ = rangeMap;
1915 exporter_ = Teuchos::null;
1916 }
1917 }
1918
1919
1920 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1921 void
1924 {
1925 const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1926
1927 globalNumEntries_ = INV;
1928 globalMaxNumRowEntries_ = INV;
1929 haveGlobalConstants_ = false;
1930 }
1931
1932
1933 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1934 void
1936 checkInternalState () const
1937 {
1938 if (debug_) {
1939 using std::endl;
1940 const char tfecfFuncName[] = "checkInternalState: ";
1941 const char suffix[] = " Please report this bug to the Tpetra developers.";
1942
1943 std::unique_ptr<std::string> prefix;
1944 if (verbose_) {
1945 prefix = this->createPrefix("CrsGraph", "checkInternalState");
1946 std::ostringstream os;
1947 os << *prefix << "Start" << endl;
1948 std::cerr << os.str();
1949 }
1950
1951 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
1952 //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1953 // check the internal state of this data structure
1954 // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1955 // always remains in a valid state
1956
1957 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1958 (this->rowMap_.is_null (), std::logic_error,
1959 "Row Map is null." << suffix);
1960 // This may access the row Map, so we need to check first (above)
1961 // whether the row Map is null.
1962 const LocalOrdinal lclNumRows =
1963 static_cast<LocalOrdinal> (this->getLocalNumRows ());
1964
1965 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1966 (this->isFillActive () == this->isFillComplete (), std::logic_error,
1967 "Graph cannot be both fill active and fill complete." << suffix);
1968 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1969 (this->isFillComplete () &&
1970 (this->colMap_.is_null () ||
1971 this->rangeMap_.is_null () ||
1972 this->domainMap_.is_null ()),
1973 std::logic_error,
1974 "Graph is full complete, but at least one of {column, range, domain} "
1975 "Map is null." << suffix);
1976 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1977 (this->isStorageOptimized () && ! this->indicesAreAllocated (),
1978 std::logic_error, "Storage is optimized, but indices are not "
1979 "allocated, not even trivially." << suffix);
1980
1981 size_t nodeAllocSize = 0;
1982 try {
1983 nodeAllocSize = this->getLocalAllocationSize ();
1984 }
1985 catch (std::logic_error& e) {
1986 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1987 (true, std::runtime_error, "getLocalAllocationSize threw "
1988 "std::logic_error: " << e.what ());
1989 }
1990 catch (std::exception& e) {
1991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1992 (true, std::runtime_error, "getLocalAllocationSize threw an "
1993 "std::exception: " << e.what ());
1994 }
1995 catch (...) {
1996 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1997 (true, std::runtime_error, "getLocalAllocationSize threw an exception "
1998 "not a subclass of std::exception.");
1999 }
2000
2001 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2002 (this->isStorageOptimized () &&
2003 nodeAllocSize != this->getLocalNumEntries (),
2004 std::logic_error, "Storage is optimized, but "
2005 "this->getLocalAllocationSize() = " << nodeAllocSize
2006 << " != this->getLocalNumEntries() = " << this->getLocalNumEntries ()
2007 << "." << suffix);
2008 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2009 (! this->haveGlobalConstants_ &&
2010 (this->globalNumEntries_ != GSTI ||
2011 this->globalMaxNumRowEntries_ != GSTI),
2012 std::logic_error, "Graph claims not to have global constants, but "
2013 "some of the global constants are not marked as invalid." << suffix);
2014 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2015 (this->haveGlobalConstants_ &&
2016 (this->globalNumEntries_ == GSTI ||
2017 this->globalMaxNumRowEntries_ == GSTI),
2018 std::logic_error, "Graph claims to have global constants, but "
2019 "some of them are marked as invalid." << suffix);
2020 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2021 (this->haveGlobalConstants_ &&
2022 (this->globalNumEntries_ < this->getLocalNumEntries () ||
2023 this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2024 std::logic_error, "Graph claims to have global constants, and "
2025 "all of the values of the global constants are valid, but "
2026 "some of the local constants are greater than "
2027 "their corresponding global constants." << suffix);
2028 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2029 (this->indicesAreAllocated () &&
2030 (this->numAllocForAllRows_ != 0 ||
2031 this->k_numAllocPerRow_.extent (0) != 0),
2032 std::logic_error, "The graph claims that its indices are allocated, but "
2033 "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2034 "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2035 "the graph is supposed to release its \"allocation specifications\" "
2036 "when it allocates its indices." << suffix);
2037 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
2038 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
2039 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2040 (rowPtrsUnpacked_host.extent(0) != rowPtrsUnpacked_dev.extent(0),
2041 std::logic_error, "The host and device views of k_rowPtrs_ have "
2042 "different sizes; rowPtrsUnpacked_host_ has size "
2043 << rowPtrsUnpacked_host.extent(0)
2044 << ", but rowPtrsUnpacked_dev_ has size "
2045 << rowPtrsUnpacked_dev.extent(0)
2046 << "." << suffix);
2047 if (isGloballyIndexed() && rowPtrsUnpacked_host.extent(0) != 0) {
2048 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2049 (size_t(rowPtrsUnpacked_host.extent(0)) != size_t(lclNumRows + 1),
2050 std::logic_error, "The graph is globally indexed and "
2051 "k_rowPtrs has nonzero size " << rowPtrsUnpacked_host.extent(0)
2052 << ", but that size does not equal lclNumRows+1 = "
2053 << (lclNumRows+1) << "." << suffix);
2054 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2055 (rowPtrsUnpacked_host(lclNumRows) != size_t(gblInds_wdv.extent(0)),
2056 std::logic_error, "The graph is globally indexed and "
2057 "k_rowPtrs_ has nonzero size " << rowPtrsUnpacked_host.extent(0)
2058 << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2059 << rowPtrsUnpacked_host(lclNumRows)
2060 << " != gblInds_wdv.extent(0)="
2061 << gblInds_wdv.extent(0) << "." << suffix);
2062 }
2063 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2064 (this->isLocallyIndexed () &&
2065 rowPtrsUnpacked_host.extent (0) != 0 &&
2066 (static_cast<size_t> (rowPtrsUnpacked_host.extent (0)) !=
2067 static_cast<size_t> (lclNumRows + 1) ||
2068 rowPtrsUnpacked_host(lclNumRows) !=
2069 static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0))),
2070 std::logic_error, "If k_rowPtrs_ has nonzero size and "
2071 "the graph is locally indexed, then "
2072 "k_rowPtrs_ must have N+1 rows, and "
2073 "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)." << suffix);
2074
2075 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2076 (this->indicesAreAllocated () &&
2077 nodeAllocSize > 0 &&
2078 this->lclIndsUnpacked_wdv.extent (0) == 0 &&
2079 this->gblInds_wdv.extent (0) == 0,
2080 std::logic_error, "Graph is allocated nontrivially, but "
2081 "but 1-D allocations are not present." << suffix);
2082
2083 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2084 (! this->indicesAreAllocated () &&
2085 ((rowPtrsUnpacked_host.extent (0) != 0 ||
2086 this->k_numRowEntries_.extent (0) != 0) ||
2087 this->lclIndsUnpacked_wdv.extent (0) != 0 ||
2088 this->gblInds_wdv.extent (0) != 0),
2089 std::logic_error, "If indices are not allocated, "
2090 "then none of the buffers should be." << suffix);
2091 // indices may be local or global only if they are allocated
2092 // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2093 // indicesAreGlobal_)
2094 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2095 ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2096 ! this->indicesAreAllocated_,
2097 std::logic_error, "Indices may be local or global only if they are "
2098 "allocated." << suffix);
2099 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2100 (this->indicesAreLocal_ && this->indicesAreGlobal_,
2101 std::logic_error, "Indices may not be both local and global." << suffix);
2102 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2103 (indicesAreLocal_ && gblInds_wdv.extent (0) != 0,
2104 std::logic_error, "Indices are local, but "
2105 "gblInds_wdv.extent(0) (= " << gblInds_wdv.extent (0)
2106 << ") != 0. In other words, if indices are local, then "
2107 "allocations of global indices should not be present."
2108 << suffix);
2109 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2110 (indicesAreGlobal_ && lclIndsUnpacked_wdv.extent (0) != 0,
2111 std::logic_error, "Indices are global, but "
2112 "lclIndsUnpacked_wdv.extent(0) (= " << lclIndsUnpacked_wdv.extent(0)
2113 << ") != 0. In other words, if indices are global, "
2114 "then allocations for local indices should not be present."
2115 << suffix);
2116 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2117 (indicesAreLocal_ && nodeAllocSize > 0 &&
2118 lclIndsUnpacked_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2119 std::logic_error, "Indices are local and "
2120 "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2121 "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
2122 << getLocalNumRows () << " > 0." << suffix);
2123 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2124 (indicesAreGlobal_ && nodeAllocSize > 0 &&
2125 gblInds_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2126 std::logic_error, "Indices are global and "
2127 "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2128 "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
2129 << getLocalNumRows () << " > 0." << suffix);
2130 // check the actual allocations
2131 if (this->indicesAreAllocated () &&
2132 rowPtrsUnpacked_host.extent (0) != 0) {
2133 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2134 (static_cast<size_t> (rowPtrsUnpacked_host.extent (0)) !=
2135 this->getLocalNumRows () + 1,
2136 std::logic_error, "Indices are allocated and "
2137 "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
2138 << rowPtrsUnpacked_host.extent (0) << " != getLocalNumRows()+1 = "
2139 << (this->getLocalNumRows () + 1) << "." << suffix);
2140 const size_t actualNumAllocated =
2141 rowPtrsUnpacked_host(this->getLocalNumRows());
2142 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2143 (this->isLocallyIndexed () &&
2144 static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0)) != actualNumAllocated,
2145 std::logic_error, "Graph is locally indexed, indices are "
2146 "are allocated, and k_rowPtrs_ has nonzero length, but "
2147 "lclIndsUnpacked_wdv.extent(0) = " << this->lclIndsUnpacked_wdv.extent (0)
2148 << " != actualNumAllocated = " << actualNumAllocated << suffix);
2149 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2150 (this->isGloballyIndexed () &&
2151 static_cast<size_t> (this->gblInds_wdv.extent (0)) != actualNumAllocated,
2152 std::logic_error, "Graph is globally indexed, indices "
2153 "are allocated, and k_rowPtrs_ has nonzero length, but "
2154 "gblInds_wdv.extent(0) = " << this->gblInds_wdv.extent (0)
2155 << " != actualNumAllocated = " << actualNumAllocated << suffix);
2156 }
2157
2158 if (verbose_) {
2159 std::ostringstream os;
2160 os << *prefix << "Done" << endl;
2161 std::cerr << os.str();
2162 }
2163 }
2164 }
2165
2166
2167 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2168 size_t
2170 getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2171 {
2172 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2173 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2174 return Teuchos::OrdinalTraits<size_t>::invalid ();
2175 }
2176 else {
2177 return rowInfo.numEntries;
2178 }
2179 }
2180
2181
2182 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2183 size_t
2185 getNumEntriesInLocalRow (LocalOrdinal localRow) const
2186 {
2187 const RowInfo rowInfo = this->getRowInfo (localRow);
2188 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2189 return Teuchos::OrdinalTraits<size_t>::invalid ();
2190 }
2191 else {
2192 return rowInfo.numEntries;
2193 }
2194 }
2195
2196
2197 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2198 size_t
2200 getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2201 {
2202 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2203 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2204 return Teuchos::OrdinalTraits<size_t>::invalid ();
2205 }
2206 else {
2207 return rowInfo.allocSize;
2208 }
2209 }
2210
2211
2212 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2213 size_t
2215 getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2216 {
2217 const RowInfo rowInfo = this->getRowInfo (localRow);
2218 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2219 return Teuchos::OrdinalTraits<size_t>::invalid ();
2220 }
2221 else {
2222 return rowInfo.allocSize;
2223 }
2224 }
2225
2226
2227 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2228 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2234
2235 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2236 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2242
2243
2244 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2245 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2247 getLocalIndicesHost () const
2248 {
2249 return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2250 }
2251
2252 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2256 {
2257 return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2258 }
2259
2260 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2261 void
2263 getLocalRowCopy (LocalOrdinal localRow,
2264 nonconst_local_inds_host_view_type & indices,
2265 size_t& numEntries) const
2266 {
2267 using Teuchos::ArrayView;
2268 const char tfecfFuncName[] = "getLocalRowCopy: ";
2269
2270 TEUCHOS_TEST_FOR_EXCEPTION(
2271 isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2272 "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2273 "does not have a column Map yet. That means we don't have local indices "
2274 "for columns yet, so it doesn't make sense to call this method. If the "
2275 "graph doesn't have a column Map yet, you should call fillComplete on "
2276 "it first.");
2277
2278 // This does the right thing (reports an empty row) if the input
2279 // row is invalid.
2280 const RowInfo rowinfo = this->getRowInfo (localRow);
2281 // No side effects on error.
2282 const size_t theNumEntries = rowinfo.numEntries;
2283 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2284 (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2285 "Specified storage (size==" << indices.size () << ") does not suffice "
2286 "to hold all " << theNumEntries << " entry/ies for this row.");
2287 numEntries = theNumEntries;
2288
2289 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2290 if (isLocallyIndexed ()) {
2291 auto lclInds = getLocalIndsViewHost(rowinfo);
2292 for (size_t j = 0; j < theNumEntries; ++j) {
2293 indices[j] = lclInds(j);
2294 }
2295 }
2296 else if (isGloballyIndexed ()) {
2297 auto gblInds = getGlobalIndsViewHost(rowinfo);
2298 for (size_t j = 0; j < theNumEntries; ++j) {
2299 indices[j] = colMap_->getLocalElement (gblInds(j));
2300 }
2301 }
2302 }
2303 }
2304
2305
2306 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2307 void
2309 getGlobalRowCopy (GlobalOrdinal globalRow,
2310 nonconst_global_inds_host_view_type &indices,
2311 size_t& numEntries) const
2312 {
2313 using Teuchos::ArrayView;
2314 const char tfecfFuncName[] = "getGlobalRowCopy: ";
2315
2316 // This does the right thing (reports an empty row) if the input
2317 // row is invalid.
2318 const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2319 const size_t theNumEntries = rowinfo.numEntries;
2320 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2321 static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2322 "Specified storage (size==" << indices.size () << ") does not suffice "
2323 "to hold all " << theNumEntries << " entry/ies for this row.");
2324 numEntries = theNumEntries; // first side effect
2325
2326 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2327 if (isLocallyIndexed ()) {
2328 auto lclInds = getLocalIndsViewHost(rowinfo);
2329 for (size_t j = 0; j < theNumEntries; ++j) {
2330 indices[j] = colMap_->getGlobalElement (lclInds(j));
2331 }
2332 }
2333 else if (isGloballyIndexed ()) {
2334 auto gblInds = getGlobalIndsViewHost(rowinfo);
2335 for (size_t j = 0; j < theNumEntries; ++j) {
2336 indices[j] = gblInds(j);
2337 }
2338 }
2339 }
2340 }
2341
2342
2343 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2344 void
2347 const LocalOrdinal localRow,
2348 local_inds_host_view_type &indices) const
2349 {
2350 const char tfecfFuncName[] = "getLocalRowView: ";
2351
2352 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2353 (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2354 "currently stored as global indices, so we cannot return a view with "
2355 "local column indices, whether or not the graph has a column Map. If "
2356 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2357
2358 const RowInfo rowInfo = getRowInfo (localRow);
2359 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2360 rowInfo.numEntries > 0) {
2361 indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2362 rowInfo.numEntries,
2363 Access::ReadOnly);
2364 }
2365 else {
2366 // This does the right thing (reports an empty row) if the input
2367 // row is invalid.
2368 indices = local_inds_host_view_type();
2369 }
2370
2371 if (debug_) {
2372 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2373 (static_cast<size_t> (indices.size ()) !=
2374 getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2375 "= " << indices.extent(0) << " != getNumEntriesInLocalRow(localRow=" <<
2376 localRow << ") = " << getNumEntriesInLocalRow(localRow) <<
2377 ". Please report this bug to the Tpetra developers.");
2378 }
2379 }
2380
2381
2382 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2383 void
2386 const GlobalOrdinal globalRow,
2387 global_inds_host_view_type &indices) const
2388 {
2389 const char tfecfFuncName[] = "getGlobalRowView: ";
2390
2391 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2392 (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2393 "currently stored as local indices, so we cannot return a view with "
2394 "global column indices. Use getGlobalRowCopy() instead.");
2395
2396 // This does the right thing (reports an empty row) if the input
2397 // row is invalid.
2398 const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2399 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2400 rowInfo.numEntries > 0) {
2401 indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2402 rowInfo.numEntries,
2403 Access::ReadOnly);
2404 }
2405 else {
2406 indices = typename global_inds_dualv_type::t_host::const_type();
2407 }
2408 if (debug_) {
2409 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2410 (static_cast<size_t> (indices.size ()) !=
2411 getNumEntriesInGlobalRow (globalRow),
2412 std::logic_error, "indices.size() = " << indices.extent(0)
2413 << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2414 << getNumEntriesInGlobalRow (globalRow)
2415 << ". Please report this bug to the Tpetra developers.");
2416 }
2417 }
2418
2419
2420 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2421 void
2423 insertLocalIndices (const LocalOrdinal localRow,
2424 const Teuchos::ArrayView<const LocalOrdinal>& indices)
2425 {
2426 const char tfecfFuncName[] = "insertLocalIndices: ";
2427
2428 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2429 (! isFillActive (), std::runtime_error, "Fill must be active.");
2430 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2431 (isGloballyIndexed (), std::runtime_error,
2432 "Graph indices are global; use insertGlobalIndices().");
2433 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2434 (! hasColMap (), std::runtime_error,
2435 "Cannot insert local indices without a column Map.");
2436 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2437 (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2438 "Local row index " << localRow << " is not in the row Map "
2439 "on the calling process.");
2440 if (! indicesAreAllocated ()) {
2441 allocateIndices (LocalIndices, verbose_);
2442 }
2443
2444 if (debug_) {
2445 // In debug mode, if the graph has a column Map, test whether any
2446 // of the given column indices are not in the column Map. Keep
2447 // track of the invalid column indices so we can tell the user
2448 // about them.
2449 if (hasColMap ()) {
2450 using Teuchos::Array;
2451 using Teuchos::toString;
2452 using std::endl;
2453 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2454
2455 const map_type& colMap = *colMap_;
2456 Array<LocalOrdinal> badColInds;
2457 bool allInColMap = true;
2458 for (size_type k = 0; k < indices.size (); ++k) {
2459 if (! colMap.isNodeLocalElement (indices[k])) {
2460 allInColMap = false;
2461 badColInds.push_back (indices[k]);
2462 }
2463 }
2464 if (! allInColMap) {
2465 std::ostringstream os;
2466 os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2467 "entries in owned row " << localRow << ", at the following column "
2468 "indices: " << toString (indices) << "." << endl;
2469 os << "Of those, the following indices are not in the column Map on "
2470 "this process: " << toString (badColInds) << "." << endl << "Since "
2471 "the graph has a column Map already, it is invalid to insert entries "
2472 "at those locations.";
2473 TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2474 }
2475 }
2476 }
2477
2478 insertLocalIndicesImpl (localRow, indices);
2479
2480 if (debug_) {
2481 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2482 (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2483 "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2484 "! isLocallyIndexed() is true. Please report this bug to the "
2485 "Tpetra developers.");
2486 }
2487 }
2488
2489 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2490 void
2492 insertLocalIndices (const LocalOrdinal localRow,
2493 const LocalOrdinal numEnt,
2494 const LocalOrdinal inds[])
2495 {
2496 Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2497 this->insertLocalIndices (localRow, indsT);
2498 }
2499
2500
2501 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2502 void
2504 insertGlobalIndices (const GlobalOrdinal gblRow,
2505 const LocalOrdinal numInputInds,
2506 const GlobalOrdinal inputGblColInds[])
2507 {
2508 typedef LocalOrdinal LO;
2509 const char tfecfFuncName[] = "insertGlobalIndices: ";
2510
2511 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2512 (this->isLocallyIndexed (), std::runtime_error,
2513 "graph indices are local; use insertLocalIndices().");
2514 // This can't really be satisfied for now, because if we are
2515 // fillComplete(), then we are local. In the future, this may
2516 // change. However, the rule that modification require active
2517 // fill will not change.
2518 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2519 (! this->isFillActive (), std::runtime_error,
2520 "You are not allowed to call this method if fill is not active. "
2521 "If fillComplete has been called, you must first call resumeFill "
2522 "before you may insert indices.");
2523 if (! indicesAreAllocated ()) {
2524 allocateIndices (GlobalIndices, verbose_);
2525 }
2526 const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2527 if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2528 if (debug_) {
2529 if (this->hasColMap ()) {
2530 using std::endl;
2531 const map_type& colMap = * (this->colMap_);
2532 // In a debug build, keep track of the nonowned ("bad") column
2533 // indices, so that we can display them in the exception
2534 // message. In a release build, just ditch the loop early if
2535 // we encounter a nonowned column index.
2536 std::vector<GlobalOrdinal> badColInds;
2537 bool allInColMap = true;
2538 for (LO k = 0; k < numInputInds; ++k) {
2539 if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2540 allInColMap = false;
2541 badColInds.push_back (inputGblColInds[k]);
2542 }
2543 }
2544 if (! allInColMap) {
2545 std::ostringstream os;
2546 os << "You attempted to insert entries in owned row " << gblRow
2547 << ", at the following column indices: [";
2548 for (LO k = 0; k < numInputInds; ++k) {
2549 os << inputGblColInds[k];
2550 if (k + static_cast<LO> (1) < numInputInds) {
2551 os << ",";
2552 }
2553 }
2554 os << "]." << endl << "Of those, the following indices are not in "
2555 "the column Map on this process: [";
2556 for (size_t k = 0; k < badColInds.size (); ++k) {
2557 os << badColInds[k];
2558 if (k + size_t (1) < badColInds.size ()) {
2559 os << ",";
2560 }
2561 }
2562 os << "]." << endl << "Since the matrix has a column Map already, "
2563 "it is invalid to insert entries at those locations.";
2564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2565 (true, std::invalid_argument, os.str ());
2566 }
2567 }
2568 } // debug_
2569 this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2570 }
2571 else { // a nonlocal row
2572 this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2573 numInputInds);
2574 }
2575 }
2576
2577
2578 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2579 void
2581 insertGlobalIndices (const GlobalOrdinal gblRow,
2582 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2583 {
2584 this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2585 inputGblColInds.getRawPtr ());
2586 }
2587
2588
2589 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2590 void
2592 insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2593 const GlobalOrdinal gblColInds[],
2594 const LocalOrdinal numGblColInds)
2595 {
2596 typedef LocalOrdinal LO;
2597 typedef GlobalOrdinal GO;
2598 const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2599
2600 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2601 (this->isLocallyIndexed (), std::runtime_error,
2602 "Graph indices are local; use insertLocalIndices().");
2603 // This can't really be satisfied for now, because if we are
2604 // fillComplete(), then we are local. In the future, this may
2605 // change. However, the rule that modification require active
2606 // fill will not change.
2607 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2608 (! this->isFillActive (), std::runtime_error,
2609 "You are not allowed to call this method if fill is not active. "
2610 "If fillComplete has been called, you must first call resumeFill "
2611 "before you may insert indices.");
2612 if (! indicesAreAllocated ()) {
2613 allocateIndices (GlobalIndices, verbose_);
2614 }
2615
2616 Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2617 // If we have a column Map, use it to filter the entries.
2618 if (! colMap_.is_null ()) {
2619 const map_type& colMap = * (this->colMap_);
2620
2621 LO curOffset = 0;
2622 while (curOffset < numGblColInds) {
2623 // Find a sequence of input indices that are in the column Map
2624 // on the calling process. Doing a sequence at a time,
2625 // instead of one at a time, amortizes some overhead.
2626 LO endOffset = curOffset;
2627 for ( ; endOffset < numGblColInds; ++endOffset) {
2628 const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2629 if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2630 break; // first entry, in current sequence, not in the column Map
2631 }
2632 }
2633 // curOffset, endOffset: half-exclusive range of indices in
2634 // the column Map on the calling process. If endOffset ==
2635 // curOffset, the range is empty.
2636 const LO numIndInSeq = (endOffset - curOffset);
2637 if (numIndInSeq != 0) {
2638 this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2639 numIndInSeq);
2640 }
2641 // Invariant before this line: Either endOffset ==
2642 // numGblColInds, or gblColInds[endOffset] is not in the
2643 // column Map on the calling process.
2644 curOffset = endOffset + 1;
2645 }
2646 }
2647 else {
2648 this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2649 gblColInds_av.size ());
2650 }
2651 }
2652
2653 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2654 void
2656 insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2657 const GlobalOrdinal gblColInds[],
2658 const LocalOrdinal numGblColInds)
2659 {
2660 // This creates the std::vector if it doesn't exist yet.
2661 // std::map's operator[] does a lookup each time, so it's better
2662 // to pull nonlocals_[grow] out of the loop.
2663 std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2664 for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2665 // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2666 // order to avoid duplicates. globalAssemble() sorts these
2667 // anyway.
2668 nonlocalRow.push_back (gblColInds[k]);
2669 }
2670 }
2671
2672 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2673 void
2675 removeLocalIndices (LocalOrdinal lrow)
2676 {
2677 const char tfecfFuncName[] = "removeLocalIndices: ";
2678 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2679 ! isFillActive (), std::runtime_error, "requires that fill is active.");
2680 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2681 isStorageOptimized (), std::runtime_error,
2682 "cannot remove indices after optimizeStorage() has been called.");
2683 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2684 isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2685 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2686 ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2687 "Local row " << lrow << " is not in the row Map on the calling process.");
2688 if (! indicesAreAllocated ()) {
2689 allocateIndices (LocalIndices, verbose_);
2690 }
2691
2692 if (k_numRowEntries_.extent (0) != 0) {
2693 this->k_numRowEntries_(lrow) = 0;
2694 }
2695
2696 if (debug_) {
2697 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2698 (getNumEntriesInLocalRow (lrow) != 0 ||
2699 ! indicesAreAllocated () ||
2700 ! isLocallyIndexed (), std::logic_error,
2701 "Violated stated post-conditions. Please contact Tpetra team.");
2702 }
2703 }
2704
2705
2706 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2707 void
2709 setAllIndices (const typename local_graph_device_type::row_map_type& rowPointers,
2710 const typename local_graph_device_type::entries_type::non_const_type& columnIndices)
2711 {
2712 using ProfilingRegion=Details::ProfilingRegion;
2713 ProfilingRegion region ("Tpetra::CrsGraph::setAllIndices");
2714 const char tfecfFuncName[] = "setAllIndices: ";
2715 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2716 ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2717 "The graph must have a column Map before you may call this method.");
2718 LocalOrdinal numLocalRows = this->getLocalNumRows ();
2719 {
2720 LocalOrdinal rowPtrLen = rowPointers.size();
2721 if(numLocalRows == 0) {
2722 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2723 rowPtrLen != 0 && rowPtrLen != 1,
2724 std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2725 }
2726 else {
2727 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2728 rowPtrLen != numLocalRows + 1,
2729 std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2730 " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2731 }
2732 }
2733
2734 if(debug_) {
2735 using exec_space = typename local_graph_device_type::execution_space;
2736 int columnsOutOfBounds = 0;
2737 local_ordinal_type numLocalCols = this->getLocalNumCols();
2738 Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2739 KOKKOS_LAMBDA (const LocalOrdinal i, int& lOutOfBounds)
2740 {
2741 if(columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2742 lOutOfBounds++;
2743 }, columnsOutOfBounds);
2744 int globalColsOutOfBounds= 0;
2745 auto comm = this->getComm();
2746 Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2747 Teuchos::outArg (globalColsOutOfBounds));
2748 if (globalColsOutOfBounds)
2749 {
2750 std::string message;
2751 if (columnsOutOfBounds)
2752 {
2753 //Only print message from ranks with the problem
2754 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2755 }
2756 Details::gathervPrint(std::cout, message, *comm);
2757 throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2758 }
2759 }
2760
2761 if (debug_ && this->isSorted()) {
2762 // Verify that the local indices are actually sorted
2763 int notSorted = 0;
2764 using exec_space = typename local_graph_device_type::execution_space;
2765 using size_type = typename local_graph_device_type::size_type;
2766 Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2767 KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2768 {
2769 size_type rowBegin = rowPointers(i);
2770 size_type rowEnd = rowPointers(i + 1);
2771 for(size_type j = rowBegin + 1; j < rowEnd; j++)
2772 {
2773 if(columnIndices(j - 1) > columnIndices(j))
2774 {
2775 lNotSorted = 1;
2776 }
2777 }
2778 }, notSorted);
2779 //All-reduce notSorted to avoid rank divergence
2780 int globalNotSorted = 0;
2781 auto comm = this->getComm();
2782 Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2783 Teuchos::outArg (globalNotSorted));
2784 if (globalNotSorted)
2785 {
2786 std::string message;
2787 if (notSorted)
2788 {
2789 //Only print message from ranks with the problem
2790 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2791 }
2792 Details::gathervPrint(std::cout, message, *comm);
2793 throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2794 }
2795 }
2796
2797 indicesAreAllocated_ = true;
2798 indicesAreLocal_ = true;
2799 indicesAreSorted_ = true;
2800 noRedundancies_ = true;
2801 lclIndsPacked_wdv= local_inds_wdv_type(columnIndices);
2803 setRowPtrs(rowPointers);
2804
2805 set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2806
2807 // Storage MUST be packed, since the interface doesn't give any
2808 // way to indicate any extra space at the end of each row.
2809 storageStatus_ = Details::STORAGE_1D_PACKED;
2810
2811 // These normally get cleared out at the end of allocateIndices.
2812 // It makes sense to clear them out here, because at the end of
2813 // this method, the graph is allocated on the calling process.
2815 k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
2816
2818 }
2819
2820
2821 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2822 void
2824 setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
2825 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
2826 {
2827 using Kokkos::View;
2828 typedef typename local_graph_device_type::row_map_type row_map_type;
2829 typedef typename row_map_type::array_layout layout_type;
2830 typedef typename row_map_type::non_const_value_type row_offset_type;
2831 typedef View<size_t*, layout_type , Kokkos::HostSpace,
2832 Kokkos::MemoryUnmanaged> input_view_type;
2833 typedef typename row_map_type::non_const_type nc_row_map_type;
2834
2835 const size_t size = static_cast<size_t> (rowPointers.size ());
2836 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2837 input_view_type ptr_in (rowPointers.getRawPtr (), size);
2838
2839 nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
2840
2841 // FIXME get rid of the else-clause when the minimum CXX standard required is bumped to C++17
2842#ifdef KOKKOS_ENABLE_CXX17
2843 if constexpr (same) { // size_t == row_offset_type
2844 using lexecution_space = typename device_type::execution_space;
2845 Kokkos::deep_copy (lexecution_space(),
2846 ptr_rot,
2847 ptr_in);
2848 }
2849#else
2850 if (same) { // size_t == row_offset_type
2851 // This compile-time logic ensures that the compiler never sees
2852 // an assignment of View<row_offset_type*, ...> to View<size_t*,
2853 // ...> unless size_t == row_offset_type.
2854 input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
2855 // DEEP_COPY REVIEW - HOST-TO-DEVICE
2856 Kokkos::deep_copy (execution_space(),
2857 Kokkos::Impl::if_c<same,
2858 nc_row_map_type,
2859 input_view_type>::select (ptr_rot, ptr_decoy),
2860 ptr_in);
2861 }
2862#endif
2863 else { // size_t != row_offset_type
2864 // CudaUvmSpace != HostSpace, so this will be false in that case.
2865 constexpr bool inHostMemory =
2866 std::is_same<typename row_map_type::memory_space,
2867 Kokkos::HostSpace>::value;
2868 if (inHostMemory) {
2869 // Copy (with cast from size_t to row_offset_type, with bounds
2870 // checking if necessary) to ptr_rot.
2871 ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
2872 }
2873 else { // Copy input row offsets to device first.
2874 //
2875 // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2876 // execution space would avoid the double copy.
2877 //
2878 View<size_t*, layout_type, device_type> ptr_st ("Tpetra::CrsGraph::ptr", size);
2879
2880 // DEEP_COPY REVIEW - NOT TESTED
2881 Kokkos::deep_copy (ptr_st, ptr_in);
2882 // Copy on device (casting from size_t to row_offset_type,
2883 // with bounds checking if necessary) to ptr_rot. This
2884 // executes in the output View's execution space, which is the
2885 // same as execution_space.
2886 ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
2887 }
2888 }
2889
2890 Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2891 Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
2892 setAllIndices (ptr_rot, k_ind);
2893 }
2894
2895
2896 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2897 void
2900 {
2901 using Teuchos::Comm;
2902 using Teuchos::outArg;
2903 using Teuchos::RCP;
2904 using Teuchos::rcp;
2905 using Teuchos::REDUCE_MAX;
2906 using Teuchos::REDUCE_MIN;
2907 using Teuchos::reduceAll;
2908 using std::endl;
2909 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2910 using LO = local_ordinal_type;
2911 using GO = global_ordinal_type;
2912 using size_type = typename Teuchos::Array<GO>::size_type;
2913 const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2914
2915 std::unique_ptr<std::string> prefix;
2916 if (verbose_) {
2917 prefix = this->createPrefix("CrsGraph", "globalAssemble");
2918 std::ostringstream os;
2919 os << *prefix << "Start" << endl;
2920 std::cerr << os.str();
2921 }
2922 RCP<const Comm<int> > comm = getComm ();
2923
2924 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2925 (! isFillActive (), std::runtime_error, "Fill must be active before "
2926 "you may call this method.");
2927
2928 const size_t myNumNonlocalRows = this->nonlocals_.size ();
2929
2930 // If no processes have nonlocal rows, then we don't have to do
2931 // anything. Checking this is probably cheaper than constructing
2932 // the Map of nonlocal rows (see below) and noticing that it has
2933 // zero global entries.
2934 {
2935 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2936 int someoneHasNonlocalRows = 0;
2937 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
2938 outArg (someoneHasNonlocalRows));
2939 if (someoneHasNonlocalRows == 0) {
2940 if (verbose_) {
2941 std::ostringstream os;
2942 os << *prefix << "Done: No nonlocal rows" << endl;
2943 std::cerr << os.str();
2944 }
2945 return;
2946 }
2947 else if (verbose_) {
2948 std::ostringstream os;
2949 os << *prefix << "At least 1 process has nonlocal rows"
2950 << endl;
2951 std::cerr << os.str();
2952 }
2953 }
2954
2955 // 1. Create a list of the "nonlocal" rows on each process. this
2956 // requires iterating over nonlocals_, so while we do this,
2957 // deduplicate the entries and get a count for each nonlocal
2958 // row on this process.
2959 // 2. Construct a new row Map corresponding to those rows. This
2960 // Map is likely overlapping. We know that the Map is not
2961 // empty on all processes, because the above all-reduce and
2962 // return exclude that case.
2963
2964 RCP<const map_type> nonlocalRowMap;
2965 // Keep this for CrsGraph's constructor.
2966 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
2967 {
2968 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
2969 size_type curPos = 0;
2970 for (auto mapIter = this->nonlocals_.begin ();
2971 mapIter != this->nonlocals_.end ();
2972 ++mapIter, ++curPos) {
2973 myNonlocalGblRows[curPos] = mapIter->first;
2974 std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2975 std::sort (gblCols.begin (), gblCols.end ());
2976 auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
2977 gblCols.erase (vecLast, gblCols.end ());
2978 numEntPerNonlocalRow[curPos] = gblCols.size ();
2979 }
2980
2981 // Currently, Map requires that its indexBase be the global min
2982 // of all its global indices. Map won't compute this for us, so
2983 // we must do it. If our process has no nonlocal rows, set the
2984 // "min" to the max possible GO value. This ensures that if
2985 // some process has at least one nonlocal row, then it will pick
2986 // that up as the min. We know that at least one process has a
2987 // nonlocal row, since the all-reduce and return at the top of
2988 // this method excluded that case.
2989 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
2990 {
2991 auto iter = std::min_element (myNonlocalGblRows.begin (),
2992 myNonlocalGblRows.end ());
2993 if (iter != myNonlocalGblRows.end ()) {
2994 myMinNonlocalGblRow = *iter;
2995 }
2996 }
2997 GO gblMinNonlocalGblRow = 0;
2998 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
2999 outArg (gblMinNonlocalGblRow));
3000 const GO indexBase = gblMinNonlocalGblRow;
3001 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3002 nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3003 }
3004
3005 if (verbose_) {
3006 std::ostringstream os;
3007 os << *prefix << "nonlocalRowMap->getIndexBase()="
3008 << nonlocalRowMap->getIndexBase() << endl;
3009 std::cerr << os.str();
3010 }
3011
3012 // 3. Use the column indices for each nonlocal row, as stored in
3013 // nonlocals_, to construct a CrsGraph corresponding to
3014 // nonlocal rows. We need, but we have, exact counts of the
3015 // number of entries in each nonlocal row.
3016
3017 RCP<crs_graph_type> nonlocalGraph =
3018 rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
3019 {
3020 size_type curPos = 0;
3021 for (auto mapIter = this->nonlocals_.begin ();
3022 mapIter != this->nonlocals_.end ();
3023 ++mapIter, ++curPos) {
3024 const GO gblRow = mapIter->first;
3025 std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3026 const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3027 nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3028 }
3029 }
3030 if (verbose_) {
3031 std::ostringstream os;
3032 os << *prefix << "Built nonlocal graph" << endl;
3033 std::cerr << os.str();
3034 }
3035 // There's no need to fill-complete the nonlocals graph.
3036 // We just use it as a temporary container for the Export.
3037
3038 // 4. If the original row Map is one to one, then we can Export
3039 // directly from nonlocalGraph into this. Otherwise, we have
3040 // to create a temporary graph with a one-to-one row Map,
3041 // Export into that, then Import from the temporary graph into
3042 // *this.
3043
3044 auto origRowMap = this->getRowMap ();
3045 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3046
3047 if (origRowMapIsOneToOne) {
3048 if (verbose_) {
3049 std::ostringstream os;
3050 os << *prefix << "Original row Map is 1-to-1" << endl;
3051 std::cerr << os.str();
3052 }
3053 export_type exportToOrig (nonlocalRowMap, origRowMap);
3054 this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3055 // We're done at this point!
3056 }
3057 else {
3058 if (verbose_) {
3059 std::ostringstream os;
3060 os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3061 std::cerr << os.str();
3062 }
3063 // If you ask a Map whether it is one to one, it does some
3064 // communication and stashes intermediate results for later use
3065 // by createOneToOne. Thus, calling createOneToOne doesn't cost
3066 // much more then the original cost of calling isOneToOne.
3067 auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3068 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3069
3070 // Create a temporary graph with the one-to-one row Map.
3071 //
3072 // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3073 // row, to avoid reallocation during the Export operation.
3074 crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3075
3076 // Export from graph of nonlocals into the temp one-to-one graph.
3077 if (verbose_) {
3078 std::ostringstream os;
3079 os << *prefix << "Export nonlocal graph" << endl;
3080 std::cerr << os.str();
3081 }
3082 oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3083
3084 // We don't need the graph of nonlocals anymore, so get rid of
3085 // it, to keep the memory high-water mark down.
3086 nonlocalGraph = Teuchos::null;
3087
3088 // Import from the one-to-one graph to the original graph.
3089 import_type importToOrig (oneToOneRowMap, origRowMap);
3090 if (verbose_) {
3091 std::ostringstream os;
3092 os << *prefix << "Import nonlocal graph" << endl;
3093 std::cerr << os.str();
3094 }
3095 this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3096 }
3097
3098 // It's safe now to clear out nonlocals_, since we've already
3099 // committed side effects to *this. The standard idiom for
3100 // clearing a Container like std::map, is to swap it with an empty
3101 // Container and let the swapped Container fall out of scope.
3102 decltype (this->nonlocals_) newNonlocals;
3103 std::swap (this->nonlocals_, newNonlocals);
3104
3106 if (verbose_) {
3107 std::ostringstream os;
3108 os << *prefix << "Done" << endl;
3109 std::cerr << os.str();
3110 }
3111 }
3112
3113
3114 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3115 void
3117 resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3118 {
3119 clearGlobalConstants();
3120 if (params != Teuchos::null) this->setParameterList (params);
3121 // either still sorted/merged or initially sorted/merged
3122 indicesAreSorted_ = true;
3123 noRedundancies_ = true;
3124 fillComplete_ = false;
3125 }
3126
3127
3128 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3129 void
3131 fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3132 {
3133 // If the graph already has domain and range Maps, don't clobber
3134 // them. If it doesn't, use the current row Map for both the
3135 // domain and range Maps.
3136 //
3137 // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3138 // column Map, and column indices are inserted which are not in
3139 // the row Map on any process, this will cause troubles. However,
3140 // that is not a common case for most applications that we
3141 // encounter, and checking for it might require more
3142 // communication.
3143 Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3144 if (domMap.is_null ()) {
3145 domMap = this->getRowMap ();
3146 }
3147 Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3148 if (ranMap.is_null ()) {
3149 ranMap = this->getRowMap ();
3150 }
3151 this->fillComplete (domMap, ranMap, params);
3152 }
3153
3154
3155 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3156 void
3158 fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3159 const Teuchos::RCP<const map_type>& rangeMap,
3160 const Teuchos::RCP<Teuchos::ParameterList>& params)
3161 {
3162 using std::endl;
3163 const char tfecfFuncName[] = "fillComplete: ";
3164 const bool verbose = verbose_;
3165
3166 std::unique_ptr<std::string> prefix;
3167 if (verbose) {
3168 prefix = this->createPrefix("CrsGraph", "fillComplete");
3169 std::ostringstream os;
3170 os << *prefix << "Start" << endl;
3171 std::cerr << os.str();
3172 }
3173
3174 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3175 (! isFillActive () || isFillComplete (), std::runtime_error,
3176 "Graph fill state must be active (isFillActive() "
3177 "must be true) before calling fillComplete().");
3178
3179 const int numProcs = getComm ()->getSize ();
3180
3181 //
3182 // Read and set parameters
3183 //
3184
3185 // Does the caller want to sort remote GIDs (within those owned by
3186 // the same process) in makeColMap()?
3187 if (! params.is_null ()) {
3188 if (params->isParameter ("sort column map ghost gids")) {
3190 params->get<bool> ("sort column map ghost gids",
3192 }
3193 else if (params->isParameter ("Sort column Map ghost GIDs")) {
3195 params->get<bool> ("Sort column Map ghost GIDs",
3197 }
3198 }
3199
3200 // If true, the caller promises that no process did nonlocal
3201 // changes since the last call to fillComplete.
3202 bool assertNoNonlocalInserts = false;
3203 if (! params.is_null ()) {
3204 assertNoNonlocalInserts =
3205 params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3206 }
3207
3208 //
3209 // Allocate indices, if they haven't already been allocated
3210 //
3211 if (! indicesAreAllocated ()) {
3212 if (hasColMap ()) {
3213 // We have a column Map, so use local indices.
3214 allocateIndices (LocalIndices, verbose);
3215 } else {
3216 // We don't have a column Map, so use global indices.
3217 allocateIndices (GlobalIndices, verbose);
3218 }
3219 }
3220
3221 //
3222 // Do global assembly, if requested and if the communicator
3223 // contains more than one process.
3224 //
3225 const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3226 if (mayNeedGlobalAssemble) {
3227 // This first checks if we need to do global assembly.
3228 // The check costs a single all-reduce.
3229 globalAssemble ();
3230 }
3231 else {
3232 const size_t numNonlocals = nonlocals_.size();
3233 if (verbose) {
3234 std::ostringstream os;
3235 os << *prefix << "Do not need to call globalAssemble; "
3236 "assertNoNonlocalInserts="
3237 << (assertNoNonlocalInserts ? "true" : "false")
3238 << "numProcs=" << numProcs
3239 << ", nonlocals_.size()=" << numNonlocals << endl;
3240 std::cerr << os.str();
3241 }
3242 const int lclNeededGlobalAssemble =
3243 (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3244 if (lclNeededGlobalAssemble != 0 && verbose) {
3245 std::ostringstream os;
3246 os << *prefix;
3247 Details::Impl::verbosePrintMap(
3248 os, nonlocals_.begin(), nonlocals_.end(),
3249 nonlocals_.size(), "nonlocals_");
3250 std::cerr << os.str() << endl;
3251 }
3252
3253 if (debug_) {
3254 auto map = this->getMap();
3255 auto comm = map.is_null() ? Teuchos::null : map->getComm();
3256 int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3257 if (! comm.is_null()) {
3258 using Teuchos::REDUCE_MAX;
3259 using Teuchos::reduceAll;
3260 reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3261 Teuchos::outArg(gblNeededGlobalAssemble));
3262 }
3263 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3264 (gblNeededGlobalAssemble != 0, std::runtime_error,
3265 "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3266 "least one process in the CrsGraph's communicator. This "
3267 "means either that you incorrectly set the "
3268 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3269 "or that you inserted invalid entries. "
3270 "Rerun with the environment variable TPETRA_VERBOSE="
3271 "CrsGraph set to see the entries of nonlocals_ on every "
3272 "MPI process (WARNING: lots of output).");
3273 }
3274 else {
3275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3276 (lclNeededGlobalAssemble != 0, std::runtime_error,
3277 "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3278 "calling process. This means either that you incorrectly "
3279 "set the \"No Nonlocal Changes\" fillComplete parameter "
3280 "to true, or that you inserted invalid entries. "
3281 "Rerun with the environment "
3282 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3283 "of nonlocals_ on every MPI process (WARNING: lots of "
3284 "output).");
3285 }
3286 }
3287
3288 // Set domain and range Map. This may clear the Import / Export
3289 // objects if the new Maps differ from any old ones.
3290 setDomainRangeMaps (domainMap, rangeMap);
3291
3292 // If the graph does not already have a column Map (either from
3293 // the user constructor calling the version of the constructor
3294 // that takes a column Map, or from a previous fillComplete call),
3295 // then create it.
3296 Teuchos::Array<int> remotePIDs (0);
3297 const bool mustBuildColMap = ! this->hasColMap ();
3298 if (mustBuildColMap) {
3299 this->makeColMap (remotePIDs); // resized on output
3300 }
3301
3302 // Make indices local, if they aren't already.
3303 // The method doesn't do any work if the indices are already local.
3304 const std::pair<size_t, std::string> makeIndicesLocalResult =
3305 this->makeIndicesLocal(verbose);
3306
3307 if (debug_) {
3309 using Teuchos::RCP;
3310 using Teuchos::REDUCE_MIN;
3311 using Teuchos::reduceAll;
3312 using Teuchos::outArg;
3313
3314 RCP<const map_type> map = this->getMap ();
3315 RCP<const Teuchos::Comm<int> > comm;
3316 if (! map.is_null ()) {
3317 comm = map->getComm ();
3318 }
3319 if (comm.is_null ()) {
3320 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3321 (makeIndicesLocalResult.first != 0, std::runtime_error,
3322 makeIndicesLocalResult.second);
3323 }
3324 else {
3325 const int lclSuccess = (makeIndicesLocalResult.first == 0);
3326 int gblSuccess = 0; // output argument
3327 reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3328 if (gblSuccess != 1) {
3329 std::ostringstream os;
3330 gathervPrint (os, makeIndicesLocalResult.second, *comm);
3331 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3332 (true, std::runtime_error, os.str ());
3333 }
3334 }
3335 }
3336 else {
3337 // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3338 // the error state to makeImportExport or
3339 // computeGlobalConstants, which may do all-reduces and thus may
3340 // have the opportunity to communicate that error state.
3341 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3342 (makeIndicesLocalResult.first != 0, std::runtime_error,
3343 makeIndicesLocalResult.second);
3344 }
3345
3346 // If this process has no indices, then CrsGraph considers it
3347 // already trivially sorted and merged. Thus, this method need
3348 // not be called on all processes in the row Map's communicator.
3349 this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3350
3351 // Make Import and Export objects, if they haven't been made
3352 // already. If we made a column Map above, reuse information from
3353 // that process to avoid communiation in the Import setup.
3354 this->makeImportExport (remotePIDs, mustBuildColMap);
3355
3356 // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3357 this->fillLocalGraph (params);
3358
3359 const bool callComputeGlobalConstants = params.get () == nullptr ||
3360 params->get ("compute global constants", true);
3361 if (callComputeGlobalConstants) {
3362 this->computeGlobalConstants ();
3363 }
3364 else {
3365 this->computeLocalConstants ();
3366 }
3367 this->fillComplete_ = true;
3368 this->checkInternalState ();
3369
3370 if (verbose) {
3371 std::ostringstream os;
3372 os << *prefix << "Done" << endl;
3373 std::cerr << os.str();
3374 }
3375 }
3376
3377
3378 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3379 void
3381 expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3382 const Teuchos::RCP<const map_type>& rangeMap,
3383 const Teuchos::RCP<const import_type>& importer,
3384 const Teuchos::RCP<const export_type>& exporter,
3385 const Teuchos::RCP<Teuchos::ParameterList>& params)
3386 {
3387 const char tfecfFuncName[] = "expertStaticFillComplete: ";
3388#ifdef HAVE_TPETRA_MMM_TIMINGS
3389 std::string label;
3390 if(!params.is_null())
3391 label = params->get("Timer Label",label);
3392 std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3393 using Teuchos::TimeMonitor;
3394 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3395#endif
3396
3397
3398 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3399 domainMap.is_null () || rangeMap.is_null (),
3400 std::runtime_error, "The input domain Map and range Map must be nonnull.");
3401 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3402 isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3403 "call this method unless the graph has a column Map.");
3404 auto rowPtrsUnpackedLength = this->getRowPtrsUnpackedHost().extent (0);
3405 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3406 getLocalNumRows () > 0 && rowPtrsUnpackedLength == 0,
3407 std::runtime_error, "The calling process has getLocalNumRows() = "
3408 << getLocalNumRows () << " > 0 rows, but the row offsets array has not "
3409 "been set.");
3410 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3411 static_cast<size_t> (rowPtrsUnpackedLength) != getLocalNumRows () + 1,
3412 std::runtime_error, "The row offsets array has length " <<
3413 rowPtrsUnpackedLength << " != getLocalNumRows()+1 = " <<
3414 (getLocalNumRows () + 1) << ".");
3415
3416 // Note: We don't need to do the following things which are normally done in fillComplete:
3417 // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3418
3419 // Constants from allocateIndices
3420 //
3421 // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3422 // away once the graph is allocated. expertStaticFillComplete
3423 // either presumes that the graph is allocated, or "allocates" it.
3424 //
3425 // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3426 // version of CrsGraph is to allocate in the constructor, not
3427 // lazily on first insert. That will make both
3428 // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3430 k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3431 indicesAreAllocated_ = true;
3432
3433 // Constants from makeIndicesLocal
3434 //
3435 // The graph has a column Map, so its indices had better be local.
3436 indicesAreLocal_ = true;
3437 indicesAreGlobal_ = false;
3438
3439 // set domain/range map: may clear the import/export objects
3440#ifdef HAVE_TPETRA_MMM_TIMINGS
3441 MM = Teuchos::null;
3442 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3443#endif
3444 setDomainRangeMaps (domainMap, rangeMap);
3445
3446 // Presume the user sorted and merged the arrays first
3447 indicesAreSorted_ = true;
3448 noRedundancies_ = true;
3449
3450 // makeImportExport won't create a new importer/exporter if I set one here first.
3451#ifdef HAVE_TPETRA_MMM_TIMINGS
3452 MM = Teuchos::null;
3453 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3454#endif
3455
3456 importer_ = Teuchos::null;
3457 exporter_ = Teuchos::null;
3458 if (importer != Teuchos::null) {
3459 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3460 ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3461 ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3462 std::invalid_argument,": importer does not match matrix maps.");
3463 importer_ = importer;
3464
3465 }
3466
3467#ifdef HAVE_TPETRA_MMM_TIMINGS
3468 MM = Teuchos::null;
3469 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3470#endif
3471
3472 if (exporter != Teuchos::null) {
3473 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3474 ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3475 ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3476 std::invalid_argument,": exporter does not match matrix maps.");
3477 exporter_ = exporter;
3478 }
3479
3480#ifdef HAVE_TPETRA_MMM_TIMINGS
3481 MM = Teuchos::null;
3482 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3483#endif
3484 Teuchos::Array<int> remotePIDs (0); // unused output argument
3485 this->makeImportExport (remotePIDs, false);
3486
3487#ifdef HAVE_TPETRA_MMM_TIMINGS
3488 MM = Teuchos::null;
3489 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3490#endif
3491 this->fillLocalGraph (params);
3492
3493 const bool callComputeGlobalConstants = params.get () == nullptr ||
3494 params->get ("compute global constants", true);
3495
3496 if (callComputeGlobalConstants) {
3497#ifdef HAVE_TPETRA_MMM_TIMINGS
3498 MM = Teuchos::null;
3499 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3500#endif // HAVE_TPETRA_MMM_TIMINGS
3501 this->computeGlobalConstants ();
3502 }
3503 else {
3504#ifdef HAVE_TPETRA_MMM_TIMINGS
3505 MM = Teuchos::null;
3506 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3507#endif // HAVE_TPETRA_MMM_TIMINGS
3508 this->computeLocalConstants ();
3509 }
3510
3511 fillComplete_ = true;
3512
3513#ifdef HAVE_TPETRA_MMM_TIMINGS
3514 MM = Teuchos::null;
3515 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3516#endif
3518 }
3519
3520
3521 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3522 void
3524 fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3525 {
3527 typedef typename local_graph_device_type::row_map_type row_map_type;
3528 typedef typename row_map_type::non_const_type non_const_row_map_type;
3529 typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3530 const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3531 "expertStaticFillComplete): ";
3532 const size_t lclNumRows = this->getLocalNumRows ();
3533
3534 // This method's goal is to fill in the two arrays (compressed
3535 // sparse row format) that define the sparse graph's structure.
3536
3537 bool requestOptimizedStorage = true;
3538 if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3539 requestOptimizedStorage = false;
3540 }
3541
3542 // The graph's column indices are currently stored in a 1-D
3543 // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3544 // in k_lclInds1D_.
3545
3546 if (debug_) {
3547 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3548 // The graph's array of row offsets must already be allocated.
3549 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3550 (rowPtrsUnpacked.extent (0) == 0, std::logic_error,
3551 "rowPtrsUnpacked_host_ has size zero, but shouldn't");
3552 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3553 (rowPtrsUnpacked.extent (0) != lclNumRows + 1, std::logic_error,
3554 "rowPtrsUnpacked_host_.extent(0) = "
3555 << rowPtrsUnpacked.extent (0) << " != (lclNumRows + 1) = "
3556 << (lclNumRows + 1) << ".");
3557 const size_t numOffsets = rowPtrsUnpacked.extent (0);
3558 const auto valToCheck = rowPtrsUnpacked(numOffsets-1);
3559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3560 (numOffsets != 0 &&
3561 lclIndsUnpacked_wdv.extent (0) != valToCheck,
3562 std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3563 " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3564 << " != rowPtrsUnpacked_host_(" << numOffsets << ")=" << valToCheck
3565 << ".");
3566 }
3567
3568 size_t allocSize = 0;
3569 try {
3570 allocSize = this->getLocalAllocationSize ();
3571 }
3572 catch (std::logic_error& e) {
3573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3574 (true, std::logic_error, "getLocalAllocationSize threw "
3575 "std::logic_error: " << e.what ());
3576 }
3577 catch (std::runtime_error& e) {
3578 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3579 (true, std::runtime_error, "getLocalAllocationSize threw "
3580 "std::runtime_error: " << e.what ());
3581 }
3582 catch (std::exception& e) {
3583 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3584 (true, std::runtime_error, "getLocalAllocationSize threw "
3585 "std::exception: " << e.what ());
3586 }
3587 catch (...) {
3588 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3589 (true, std::runtime_error, "getLocalAllocationSize threw "
3590 "an exception not a subclass of std::exception.");
3591 }
3592
3593 if (this->getLocalNumEntries () != allocSize) {
3594 // Use the nonconst version of row_map_type for ptr_d, because
3595 // the latter is const and we need to modify ptr_d here.
3596 non_const_row_map_type ptr_d;
3597 row_map_type ptr_d_const;
3598
3599 // The graph's current 1-D storage is "unpacked." This means
3600 // the row offsets may differ from what the final row offsets
3601 // should be. This could happen, for example, if the user set
3602 // an upper bound on the number of entries in each row, but
3603 // didn't fill all those entries.
3604
3605 if (debug_) {
3606 auto rowPtrsUnpacked = this->getRowPtrsUnpackedHost();
3607 if (rowPtrsUnpacked.extent (0) != 0) {
3608 const size_t numOffsets =
3609 static_cast<size_t> (rowPtrsUnpacked.extent (0));
3610 const auto valToCheck = rowPtrsUnpacked(numOffsets - 1);
3611 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3612 (valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3613 std::logic_error, "(Unpacked branch) Before allocating "
3614 "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3615 << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3616 << lclIndsUnpacked_wdv.extent (0) << ".");
3617 }
3618 }
3619
3620 // Pack the row offsets into ptr_d, by doing a sum-scan of the
3621 // array of valid entry counts per row (k_numRowEntries_).
3622
3623 // Total number of entries in the matrix on the calling
3624 // process. We will compute this in the loop below. It's
3625 // cheap to compute and useful as a sanity check.
3626 size_t lclTotalNumEntries = 0;
3627 {
3628 // Allocate the packed row offsets array.
3629 ptr_d =
3630 non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3631 ptr_d_const = ptr_d;
3632
3633 // It's ok that k_numRowEntries_ is a host View; the
3634 // function can handle this.
3635 typename num_row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3636 if (debug_) {
3637 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3638 (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3639 std::logic_error, "(Unpacked branch) "
3640 "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3641 << " != getLocalNumRows()=" << lclNumRows << "");
3642 }
3643
3644 lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3645
3646 if (debug_) {
3647 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3648 (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3649 std::logic_error, "(Unpacked branch) After allocating "
3650 "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3651 << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3652 const auto valToCheck =
3653 ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3655 (valToCheck != lclTotalNumEntries, std::logic_error,
3656 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3657 "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3658 << ") = " << valToCheck << " != total number of entries "
3659 "on the calling process = " << lclTotalNumEntries
3660 << ".");
3661 }
3662 }
3663
3664 // Allocate the array of packed column indices.
3665 lclinds_1d_type ind_d =
3666 lclinds_1d_type ("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3667
3668 // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3669 // them, using the packed row offsets array ptr_d that we
3670 // created above.
3671 //
3672 // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3673 // CrsMatrix?), we need to keep around the unpacked row
3674 // offsets and column indices.
3675
3676 // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3677 // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3678 typedef pack_functor<
3679 typename local_graph_device_type::entries_type::non_const_type,
3680 typename local_inds_dualv_type::t_dev::const_type,
3681 row_map_type,
3682 typename local_graph_device_type::row_map_type> inds_packer_type;
3683 inds_packer_type f (ind_d,
3684 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3685 ptr_d, this->getRowPtrsUnpackedDevice());
3686 {
3687 typedef typename decltype (ind_d)::execution_space exec_space;
3688 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3689 Kokkos::parallel_for (range_type (0, lclNumRows), f);
3690 }
3691
3692 if (debug_) {
3693 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3694 (ptr_d.extent (0) == 0, std::logic_error,
3695 "(\"Optimize Storage\"=true branch) After packing, "
3696 "ptr_d.extent(0)=0.");
3697 if (ptr_d.extent (0) != 0) {
3698 const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
3699 const auto valToCheck =
3700 ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
3701 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3702 (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3703 std::logic_error, "(\"Optimize Storage\"=true branch) "
3704 "After packing, ptr_d(" << (numOffsets-1) << ")="
3705 << valToCheck << " != ind_d.extent(0)="
3706 << ind_d.extent(0) << ".");
3707 }
3708 }
3709 // Build the local graph.
3710 if (requestOptimizedStorage)
3711 setRowPtrs(ptr_d_const);
3712 else
3713 setRowPtrsPacked(ptr_d_const);
3714 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3715 }
3716 else { // We don't have to pack, so just set the pointers.
3717 //Set both packed and unpacked rowptrs to this
3718 this->setRowPtrs(rowPtrsUnpacked_dev_);
3719 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3720
3721 if (debug_) {
3722 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3723 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3724 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3725 (rowPtrsPacked_dev.extent (0) == 0, std::logic_error,
3726 "(\"Optimize Storage\"=false branch) "
3727 "rowPtrsPacked_dev_.extent(0) = 0.");
3728 if (rowPtrsPacked_dev.extent (0) != 0) {
3729 const size_t numOffsets =
3730 static_cast<size_t> (rowPtrsPacked_dev.extent (0));
3731 const size_t valToCheck =
3732 rowPtrsPacked_host(numOffsets - 1);
3733 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3734 (valToCheck != size_t(lclIndsPacked_wdv.extent (0)),
3735 std::logic_error, "(\"Optimize Storage\"=false branch) "
3736 "rowPtrsPacked_dev_(" << (numOffsets-1) << ")="
3737 << valToCheck
3738 << " != lclIndsPacked_wdv.extent(0)="
3739 << lclIndsPacked_wdv.extent (0) << ".");
3740 }
3741 }
3742 }
3743
3744 if (debug_) {
3745 auto rowPtrsPacked_dev = this->getRowPtrsPackedDevice();
3746 auto rowPtrsPacked_host = this->getRowPtrsPackedHost();
3747 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3748 (static_cast<size_t> (rowPtrsPacked_dev.extent (0)) != lclNumRows + 1,
3749 std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " <<
3750 rowPtrsPacked_dev.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
3751 << ".");
3752 if (rowPtrsPacked_dev.extent (0) != 0) {
3753 const size_t numOffsets = static_cast<size_t> (rowPtrsPacked_dev.extent (0));
3754 const auto valToCheck = rowPtrsPacked_host(numOffsets - 1);
3755 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3756 (static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
3757 std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
3758 << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = "
3759 << lclIndsPacked_wdv.extent (0) << ".");
3760 }
3761 }
3762
3763 if (requestOptimizedStorage) {
3764 // With optimized storage, we don't need to store
3765 // the array of row entry counts.
3766
3767 // Free graph data structures that are only needed for
3768 // unpacked 1-D storage.
3769 k_numRowEntries_ = num_row_entries_type ();
3770
3771 // Keep the new 1-D packed allocations.
3772 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3773
3774 storageStatus_ = Details::STORAGE_1D_PACKED;
3775 }
3776
3777 set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3778 }
3779
3780 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3781 void
3783 replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3784 {
3785 // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3786 //
3787 // FIXME (mfh 18 Aug 2014) This will break if the calling process
3788 // has no entries, because in that case, currently it is neither
3789 // locally nor globally indexed. This will change once we get rid
3790 // of lazy allocation (so that the constructor allocates indices
3791 // and therefore commits to local vs. global).
3792 const char tfecfFuncName[] = "replaceColMap: ";
3793 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3794 isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
3795 "Requires matching maps and non-static graph.");
3796 colMap_ = newColMap;
3797 }
3798
3799 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3800 void
3802 reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
3803 const Teuchos::RCP<const import_type>& newImport,
3804 const bool sortIndicesInEachRow)
3805 {
3806 using Teuchos::REDUCE_MIN;
3807 using Teuchos::reduceAll;
3808 using Teuchos::RCP;
3809 typedef GlobalOrdinal GO;
3810 typedef LocalOrdinal LO;
3811 typedef typename local_inds_dualv_type::t_host col_inds_type;
3812 const char tfecfFuncName[] = "reindexColumns: ";
3813
3814 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3815 isFillComplete (), std::runtime_error, "The graph is fill complete "
3816 "(isFillComplete() returns true). You must call resumeFill() before "
3817 "you may call this method.");
3818
3819 // mfh 19 Aug 2014: This method does NOT redistribute data; it
3820 // doesn't claim to do the work of an Import or Export. This
3821 // means that for all processes, the calling process MUST own all
3822 // column indices, in both the old column Map (if it exists) and
3823 // the new column Map. We check this via an all-reduce.
3824 //
3825 // Some processes may be globally indexed, others may be locally
3826 // indexed, and others (that have no graph entries) may be
3827 // neither. This method will NOT change the graph's current
3828 // state. If it's locally indexed, it will stay that way, and
3829 // vice versa. It would easy to add an option to convert indices
3830 // from global to local, so as to save a global-to-local
3831 // conversion pass. However, we don't do this here. The intended
3832 // typical use case is that the graph already has a column Map and
3833 // is locally indexed, and this is the case for which we optimize.
3834
3835 const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
3836
3837 // Attempt to convert indices to the new column Map's version of
3838 // local. This will fail if on the calling process, the graph has
3839 // indices that are not on that process in the new column Map.
3840 // After the local conversion attempt, we will do an all-reduce to
3841 // see if any processes failed.
3842
3843 // If this is false, then either the graph contains a column index
3844 // which is invalid in the CURRENT column Map, or the graph is
3845 // locally indexed but currently has no column Map. In either
3846 // case, there is no way to convert the current local indices into
3847 // global indices, so that we can convert them into the new column
3848 // Map's local indices. It's possible for this to be true on some
3849 // processes but not others, due to replaceColMap.
3850 bool allCurColIndsValid = true;
3851 // On the calling process, are all valid current column indices
3852 // also in the new column Map on the calling process? In other
3853 // words, does local reindexing suffice, or should the user have
3854 // done an Import or Export instead?
3855 bool localSuffices = true;
3856
3857 // Final arrays for the local indices. We will allocate exactly
3858 // one of these ONLY if the graph is locally indexed on the
3859 // calling process, and ONLY if the graph has one or more entries
3860 // (is not empty) on the calling process. In that case, we
3861 // allocate the first (1-D storage) if the graph has a static
3862 // profile, else we allocate the second (2-D storage).
3863 col_inds_type newLclInds1D;
3864 auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
3865
3866 // If indices aren't allocated, that means the calling process
3867 // owns no entries in the graph. Thus, there is nothing to
3868 // convert, and it trivially succeeds locally.
3869 if (indicesAreAllocated ()) {
3870 if (isLocallyIndexed ()) {
3871 if (hasColMap ()) { // locally indexed, and currently has a column Map
3872 const map_type& oldColMap = * (getColMap ());
3873 // Allocate storage for the new local indices.
3874 const size_t allocSize = this->getLocalAllocationSize ();
3875 newLclInds1D = col_inds_type("Tpetra::CrsGraph::lclIndsReindexedHost",
3876 allocSize);
3877 // Attempt to convert the new indices locally.
3878 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3879 const RowInfo rowInfo = this->getRowInfo (lclRow);
3880 const size_t beg = rowInfo.offset1D;
3881 const size_t end = beg + rowInfo.numEntries;
3882 for (size_t k = beg; k < end; ++k) {
3883 const LO oldLclCol = oldLclInds1D(k);
3884 if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3885 allCurColIndsValid = false;
3886 break; // Stop at the first invalid index
3887 }
3888 const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
3889
3890 // The above conversion MUST succeed. Otherwise, the
3891 // current local index is invalid, which means that
3892 // the graph was constructed incorrectly.
3893 if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
3894 allCurColIndsValid = false;
3895 break; // Stop at the first invalid index
3896 }
3897 else {
3898 const LO newLclCol = newColMap->getLocalElement (gblCol);
3899 if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3900 localSuffices = false;
3901 break; // Stop at the first invalid index
3902 }
3903 newLclInds1D(k) = newLclCol;
3904 }
3905 } // for each entry in the current row
3906 } // for each locally owned row
3907 }
3908 else { // locally indexed, but no column Map
3909 // This case is only possible if replaceColMap() was called
3910 // with a null argument on the calling process. It's
3911 // possible, but it means that this method can't possibly
3912 // succeed, since we have no way of knowing how to convert
3913 // the current local indices to global indices.
3914 allCurColIndsValid = false;
3915 }
3916 }
3917 else { // globally indexed
3918 // If the graph is globally indexed, we don't need to save
3919 // local indices, but we _do_ need to know whether the current
3920 // global indices are valid in the new column Map. We may
3921 // need to do a getRemoteIndexList call to find this out.
3922 //
3923 // In this case, it doesn't matter whether the graph currently
3924 // has a column Map. We don't need the old column Map to
3925 // convert from global indices to the _new_ column Map's local
3926 // indices. Furthermore, we can use the same code, whether
3927 // the graph is static or dynamic profile.
3928
3929 // Test whether the current global indices are in the new
3930 // column Map on the calling process.
3931 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3932 const RowInfo rowInfo = this->getRowInfo (lclRow);
3933 auto oldGblRowView = this->getGlobalIndsViewHost (rowInfo);
3934 for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3935 const GO gblCol = oldGblRowView(k);
3936 if (! newColMap->isNodeGlobalElement (gblCol)) {
3937 localSuffices = false;
3938 break; // Stop at the first invalid index
3939 }
3940 } // for each entry in the current row
3941 } // for each locally owned row
3942 } // locally or globally indexed
3943 } // whether indices are allocated
3944
3945 // Do an all-reduce to check both possible error conditions.
3946 int lclSuccess[2];
3947 lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3948 lclSuccess[1] = localSuffices ? 1 : 0;
3949 int gblSuccess[2];
3950 gblSuccess[0] = 0;
3951 gblSuccess[1] = 0;
3952 RCP<const Teuchos::Comm<int> > comm =
3953 getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
3954 if (! comm.is_null ()) {
3955 reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
3956 }
3957
3958 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3959 gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
3960 " The most likely reason is that the graph is locally indexed, but the "
3961 "column Map is missing (null) on some processes, due to a previous call "
3962 "to replaceColMap().");
3963
3964 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3965 gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
3966 "contains column indices that are in the old column Map, but not in the "
3967 "new column Map (on that process). This method does NOT redistribute "
3968 "data; it does not claim to do the work of an Import or Export operation."
3969 " This means that for all processess, the calling process MUST own all "
3970 "column indices, in both the old column Map and the new column Map. In "
3971 "this case, you will need to do an Import or Export operation to "
3972 "redistribute data.");
3973
3974 // Commit the results.
3975 if (isLocallyIndexed ()) {
3976 { // scope the device view; sortAndMergeAllIndices needs host
3977 typename local_inds_dualv_type::t_dev newLclInds1D_dev(
3978 Kokkos::view_alloc("Tpetra::CrsGraph::lclIndReindexed",
3979 Kokkos::WithoutInitializing),
3980 newLclInds1D.extent(0));
3981 Kokkos::deep_copy(newLclInds1D_dev, newLclInds1D);
3982 lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3983 }
3984
3985 // We've reindexed, so we don't know if the indices are sorted.
3986 //
3987 // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3988 // since we're already going through all the indices above. We
3989 // could also sort each row in place; that way, we would only
3990 // have to make one pass over the rows.
3991 indicesAreSorted_ = false;
3992 if (sortIndicesInEachRow) {
3993 // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3994 // order to call this method.
3995 //
3996 // FIXME (mfh 17 Sep 2014) This violates the strong exception
3997 // guarantee. It would be better to sort the new index arrays
3998 // before committing them.
3999 const bool sorted = false; // need to resort
4000 const bool merged = true; // no need to merge, since no dups
4001 this->sortAndMergeAllIndices (sorted, merged);
4002 }
4003 }
4004 colMap_ = newColMap;
4005
4006 if (newImport.is_null ()) {
4007 // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4008 // check whether the input Import is null on any process.
4009 //
4010 // If the domain Map hasn't been set yet, we can't compute a new
4011 // Import object. Leave it what it is; it should be null, but
4012 // it doesn't matter. If the domain Map _has_ been set, then
4013 // compute a new Import object if necessary.
4014 if (! domainMap_.is_null ()) {
4015 if (! domainMap_->isSameAs (* newColMap)) {
4016 importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4017 } else {
4018 importer_ = Teuchos::null; // don't need an Import
4019 }
4020 }
4021 } else {
4022 // The caller gave us an Import object. Assume that it's valid.
4023 importer_ = newImport;
4024 }
4025 }
4026
4027 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4028 void
4030 replaceDomainMap (const Teuchos::RCP<const map_type>& newDomainMap)
4031 {
4032 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
4033 TEUCHOS_TEST_FOR_EXCEPTION(
4034 colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4035 "this method unless the graph already has a column Map.");
4036 TEUCHOS_TEST_FOR_EXCEPTION(
4037 newDomainMap.is_null (), std::invalid_argument,
4038 prefix << "The new domain Map must be nonnull.");
4039
4040 // Create a new importer, if needed
4041 Teuchos::RCP<const import_type> newImporter = Teuchos::null;
4042 if (newDomainMap != colMap_ && (! newDomainMap->isSameAs (*colMap_))) {
4043 newImporter = rcp(new import_type(newDomainMap, colMap_));
4044 }
4045 this->replaceDomainMapAndImporter(newDomainMap, newImporter);
4046 }
4047
4048 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4049 void
4051 replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4052 const Teuchos::RCP<const import_type>& newImporter)
4053 {
4054 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4055 TEUCHOS_TEST_FOR_EXCEPTION(
4056 colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4057 "this method unless the graph already has a column Map.");
4058 TEUCHOS_TEST_FOR_EXCEPTION(
4059 newDomainMap.is_null (), std::invalid_argument,
4060 prefix << "The new domain Map must be nonnull.");
4061
4062 if (debug_) {
4063 if (newImporter.is_null ()) {
4064 // It's not a good idea to put expensive operations in a macro
4065 // clause, even if they are side effect - free, because macros
4066 // don't promise that they won't evaluate their arguments more
4067 // than once. It's polite for them to do so, but not required.
4068 const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4069 TEUCHOS_TEST_FOR_EXCEPTION
4070 (!colSameAsDom, std::invalid_argument, "If the new Import is null, "
4071 "then the new domain Map must be the same as the current column Map.");
4072 }
4073 else {
4074 const bool colSameAsTgt =
4075 colMap_->isSameAs (* (newImporter->getTargetMap ()));
4076 const bool newDomSameAsSrc =
4077 newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4078 TEUCHOS_TEST_FOR_EXCEPTION
4079 (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4080 "new Import is nonnull, then the current column Map must be the same "
4081 "as the new Import's target Map, and the new domain Map must be the "
4082 "same as the new Import's source Map.");
4083 }
4084 }
4085
4086 domainMap_ = newDomainMap;
4087 importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4088 }
4089
4090 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4091 void
4093 replaceRangeMap (const Teuchos::RCP<const map_type>& newRangeMap)
4094 {
4095 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
4096 TEUCHOS_TEST_FOR_EXCEPTION(
4097 rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4098 "this method unless the graph already has a row Map.");
4099 TEUCHOS_TEST_FOR_EXCEPTION(
4100 newRangeMap.is_null (), std::invalid_argument,
4101 prefix << "The new range Map must be nonnull.");
4102
4103 // Create a new exporter, if needed
4104 Teuchos::RCP<const export_type> newExporter = Teuchos::null;
4105 if (newRangeMap != rowMap_ && (! newRangeMap->isSameAs (*rowMap_))) {
4106 newExporter = rcp(new export_type(rowMap_, newRangeMap));
4107 }
4108 this->replaceRangeMapAndExporter(newRangeMap, newExporter);
4109 }
4110
4111 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4112 void
4114 replaceRangeMapAndExporter (const Teuchos::RCP<const map_type>& newRangeMap,
4115 const Teuchos::RCP<const export_type>& newExporter)
4116 {
4117 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4118 TEUCHOS_TEST_FOR_EXCEPTION(
4119 rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4120 "this method unless the graph already has a column Map.");
4121 TEUCHOS_TEST_FOR_EXCEPTION(
4122 newRangeMap.is_null (), std::invalid_argument,
4123 prefix << "The new domain Map must be nonnull.");
4124
4125 if (debug_) {
4126 if (newExporter.is_null ()) {
4127 // It's not a good idea to put expensive operations in a macro
4128 // clause, even if they are side effect - free, because macros
4129 // don't promise that they won't evaluate their arguments more
4130 // than once. It's polite for them to do so, but not required.
4131 const bool rowSameAsRange = rowMap_->isSameAs (*newRangeMap);
4132 TEUCHOS_TEST_FOR_EXCEPTION
4133 (!rowSameAsRange, std::invalid_argument, "If the new Export is null, "
4134 "then the new range Map must be the same as the current row Map.");
4135 }
4136 else {
4137 const bool newRangeSameAsTgt =
4138 newRangeMap->isSameAs (* (newExporter->getTargetMap ()));
4139 const bool rowSameAsSrc =
4140 rowMap_->isSameAs (* (newExporter->getSourceMap ()));
4141 TEUCHOS_TEST_FOR_EXCEPTION
4142 (! rowSameAsSrc || ! newRangeSameAsTgt, std::invalid_argument, "If the "
4143 "new Export is nonnull, then the current row Map must be the same "
4144 "as the new Export's source Map, and the new range Map must be the "
4145 "same as the new Export's target Map.");
4146 }
4147 }
4148
4149 rangeMap_ = newRangeMap;
4150 exporter_ = Teuchos::rcp_const_cast<export_type> (newExporter);
4151 }
4152
4153
4154 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4157 getLocalGraphDevice () const
4158 {
4160 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4161 this->getRowPtrsPackedDevice());
4162 }
4163
4164 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4167 getLocalGraphHost () const
4168 {
4169 return local_graph_host_type(
4170 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4171 this->getRowPtrsPackedHost());
4172 }
4173
4174 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4175 void
4178 {
4180 using Teuchos::ArrayView;
4181 using Teuchos::outArg;
4182 using Teuchos::reduceAll;
4183 typedef global_size_t GST;
4184
4185 ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4186
4187 this->computeLocalConstants ();
4188
4189 // Compute global constants from local constants. Processes that
4190 // already have local constants still participate in the
4191 // all-reduces, using their previously computed values.
4192 if (! this->haveGlobalConstants_) {
4193 const Teuchos::Comm<int>& comm = * (this->getComm ());
4194 // Promote all the nodeNum* and nodeMaxNum* quantities from
4195 // size_t to global_size_t, when doing the all-reduces for
4196 // globalNum* / globalMaxNum* results.
4197 //
4198 // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4199 // this in two all-reduces (one for the sum and the other for
4200 // the max), or use a custom MPI_Op that combines the sum and
4201 // the max. The latter might even be slower than two
4202 // all-reduces on modern network hardware. It would also be a
4203 // good idea to use nonblocking all-reduces (MPI 3), so that we
4204 // don't have to wait around for the first one to finish before
4205 // starting the second one.
4206 GST lcl, gbl;
4207 lcl = static_cast<GST> (this->getLocalNumEntries ());
4208
4209 reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4210 this->globalNumEntries_ = gbl;
4211
4212 const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4213 reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4214 outArg (this->globalMaxNumRowEntries_));
4215 this->haveGlobalConstants_ = true;
4216 }
4217 }
4218
4219
4220 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4221 void
4224 {
4226
4227 ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4228 if (this->haveLocalConstants_) {
4229 return;
4230 }
4231
4232 // Reset local properties
4233 this->nodeMaxNumRowEntries_ =
4234 Teuchos::OrdinalTraits<size_t>::invalid();
4235
4236 using LO = local_ordinal_type;
4237
4238 auto ptr = this->getRowPtrsPackedDevice();
4239 const LO lclNumRows = ptr.extent(0) == 0 ?
4240 static_cast<LO> (0) :
4241 (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4242
4243 const LO lclMaxNumRowEnt =
4244 ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4245 ptr, lclNumRows);
4246 this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4247 this->haveLocalConstants_ = true;
4248 }
4249
4250
4251 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4252 std::pair<size_t, std::string>
4254 makeIndicesLocal (const bool verbose)
4255 {
4257 using Teuchos::arcp;
4258 using Teuchos::Array;
4259 using std::endl;
4260 typedef LocalOrdinal LO;
4261 typedef GlobalOrdinal GO;
4262 typedef device_type DT;
4263 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4264 typedef typename num_row_entries_type::non_const_value_type num_ent_type;
4265 const char tfecfFuncName[] = "makeIndicesLocal: ";
4266 ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4267
4268 std::unique_ptr<std::string> prefix;
4269 if (verbose) {
4270 prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4271 std::ostringstream os;
4272 os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
4273 std::cerr << os.str();
4274 }
4275
4276 // These are somewhat global properties, so it's safe to have
4277 // exception checks for them, rather than returning an error code.
4278 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4279 (! this->hasColMap (), std::logic_error, "The graph does not have a "
4280 "column Map yet. This method should never be called in that case. "
4281 "Please report this bug to the Tpetra developers.");
4282 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4283 (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4284 "that it has a column Map, because hasColMap() returns true. However, "
4285 "the result of getColMap() is null. This should never happen. Please "
4286 "report this bug to the Tpetra developers.");
4287
4288 // Return value 1: The number of column indices (counting
4289 // duplicates) that could not be converted to local indices,
4290 // because they were not in the column Map on the calling process.
4291 size_t lclNumErrs = 0;
4292 std::ostringstream errStrm; // for return value 2 (error string)
4293
4294 const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
4295 const map_type& colMap = * (this->getColMap ());
4296
4297 if (this->isGloballyIndexed () && lclNumRows != 0) {
4298 // This is a host-accessible View.
4299 typename num_row_entries_type::const_type h_numRowEnt =
4300 this->k_numRowEntries_;
4301
4302 auto rowPtrsUnpacked_host = this->getRowPtrsUnpackedHost();
4303
4304 // Allocate space for local indices.
4305 if (rowPtrsUnpacked_host.extent(0) == 0) {
4306 errStrm << "Unpacked row pointers (rowPtrsUnpacked_dev_) has length 0. This should never "
4307 "happen here. Please report this bug to the Tpetra developers."
4308 << endl;
4309 // Need to return early.
4310 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4311 errStrm.str ());
4312 }
4313 const auto numEnt = rowPtrsUnpacked_host(lclNumRows);
4314
4315 // mfh 17 Dec 2016: We don't need initial zero-fill of
4316 // lclIndsUnpacked_wdv, because we will fill it below anyway.
4317 // AllowPadding would only help for aligned access (e.g.,
4318 // for vectorization) if we also were to pad each row to the
4319 // same alignment, so we'll skip AllowPadding for now.
4320
4321 // using Kokkos::AllowPadding;
4322 using Kokkos::view_alloc;
4323 using Kokkos::WithoutInitializing;
4324
4325 // When giving the label as an argument to
4326 // Kokkos::view_alloc, the label must be a string and not a
4327 // char*, else the code won't compile. This is because
4328 // view_alloc also allows a raw pointer as its first
4329 // argument. See
4330 // https://github.com/kokkos/kokkos/issues/434. This is a
4331 // large allocation typically, so the overhead of creating
4332 // an std::string is minor.
4333 const std::string label ("Tpetra::CrsGraph::lclInd");
4334 if (verbose) {
4335 std::ostringstream os;
4336 os << *prefix << "(Re)allocate lclInd_wdv: old="
4337 << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4338 std::cerr << os.str();
4339 }
4340
4341 local_inds_dualv_type lclInds_dualv =
4342 local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4343 numEnt);
4344 lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4345
4346 auto lclColMap = colMap.getLocalMap ();
4347 // This is a "device mirror" of the host View h_numRowEnt.
4348 //
4349 // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4350 // Device instance is to use its default constructor. See the
4351 // following Kokkos issue:
4352 //
4353 // https://github.com/kokkos/kokkos/issues/442
4354 if (verbose) {
4355 std::ostringstream os;
4356 os << *prefix << "Allocate device mirror k_numRowEnt: "
4357 << h_numRowEnt.extent(0) << endl;
4358 std::cerr << os.str();
4359 }
4360 auto k_numRowEnt =
4361 Kokkos::create_mirror_view_and_copy (device_type (), h_numRowEnt);
4362
4364 lclNumErrs =
4365 convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (
4366 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4367 gblInds_wdv.getDeviceView(Access::ReadOnly),
4368 this->getRowPtrsUnpackedDevice(),
4369 lclColMap,
4370 k_numRowEnt);
4371 if (lclNumErrs != 0) {
4372 const int myRank = [this] () {
4373 auto map = this->getMap ();
4374 if (map.is_null ()) {
4375 return 0;
4376 }
4377 else {
4378 auto comm = map->getComm ();
4379 return comm.is_null () ? 0 : comm->getRank ();
4380 }
4381 } ();
4382 const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4383 errStrm << "(Process " << myRank << ") When converting column "
4384 "indices from global to local, we encountered " << lclNumErrs
4385 << " ind" << (pluralNumErrs ? "ices" : "ex")
4386 << " that do" << (pluralNumErrs ? "es" : "")
4387 << " not live in the column Map on this process." << endl;
4388 }
4389
4390 // We've converted column indices from global to local, so we
4391 // can deallocate the global column indices (which we know are
4392 // in 1-D storage, because the graph has static profile).
4393 if (verbose) {
4394 std::ostringstream os;
4395 os << *prefix << "Free gblInds_wdv: "
4396 << gblInds_wdv.extent(0) << endl;
4397 std::cerr << os.str();
4398 }
4399 gblInds_wdv = global_inds_wdv_type ();
4400 } // globallyIndexed() && lclNumRows > 0
4401
4402 this->indicesAreLocal_ = true;
4403 this->indicesAreGlobal_ = false;
4404 this->checkInternalState ();
4405
4406 return std::make_pair (lclNumErrs, errStrm.str ());
4407 }
4408
4409 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4410 void
4412 makeColMap (Teuchos::Array<int>& remotePIDs)
4413 {
4415 using std::endl;
4416 const char tfecfFuncName[] = "makeColMap";
4417
4418 ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4419 std::unique_ptr<std::string> prefix;
4420 if (verbose_) {
4421 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4422 std::ostringstream os;
4423 os << *prefix << "Start" << endl;
4424 std::cerr << os.str();
4425 }
4426
4427 // this->colMap_ should be null at this point, but we accept the
4428 // future possibility that it might not be (esp. if we decide
4429 // later to support graph structure changes after first
4430 // fillComplete, which CrsGraph does not currently (as of 12 Feb
4431 // 2017) support).
4432 Teuchos::RCP<const map_type> colMap = this->colMap_;
4433 const bool sortEachProcsGids =
4435
4436 // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4437 // per-process error code. If an error does occur on a process,
4438 // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4439 // notice that error. This is the caller's responsibility. For
4440 // now, we only propagate (to all processes) and report the error
4441 // in debug mode. In the future, we need to add the local/global
4442 // error handling scheme used in BlockCrsMatrix to this class.
4443 if (debug_) {
4444 using Teuchos::outArg;
4445 using Teuchos::REDUCE_MIN;
4446 using Teuchos::reduceAll;
4447
4448 std::ostringstream errStrm;
4449 const int lclErrCode =
4450 Details::makeColMap (colMap, remotePIDs,
4451 getDomainMap (), *this, sortEachProcsGids, &errStrm);
4452 auto comm = this->getComm ();
4453 if (! comm.is_null ()) {
4454 const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4455 int gblSuccess = 0; // output argument
4456 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4457 outArg (gblSuccess));
4458 if (gblSuccess != 1) {
4459 std::ostringstream os;
4460 Details::gathervPrint (os, errStrm.str (), *comm);
4461 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4462 (true, std::runtime_error, ": An error happened on at "
4463 "least one process in the CrsGraph's communicator. "
4464 "Here are all processes' error messages:" << std::endl
4465 << os.str ());
4466 }
4467 }
4468 }
4469 else {
4470 (void) Details::makeColMap (colMap, remotePIDs,
4471 getDomainMap (), *this, sortEachProcsGids, nullptr);
4472 }
4473 // See above. We want to admit the possibility of makeColMap
4474 // actually revising an existing column Map, even though that
4475 // doesn't currently (as of 10 May 2017) happen.
4476 this->colMap_ = colMap;
4477
4479 if (verbose_) {
4480 std::ostringstream os;
4481 os << *prefix << "Done" << endl;
4482 std::cerr << os.str();
4483 }
4484 }
4485
4486
4487 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4488 void
4490 sortAndMergeAllIndices (const bool sorted, const bool merged)
4491 {
4492 using std::endl;
4493 using LO = LocalOrdinal;
4494 using host_execution_space =
4495 typename Kokkos::View<LO*, device_type>::HostMirror::
4496 execution_space;
4497 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4498 const char tfecfFuncName[] = "sortAndMergeAllIndices";
4499 Details::ProfilingRegion regionSortAndMerge
4500 ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4501
4502 std::unique_ptr<std::string> prefix;
4503 if (verbose_) {
4504 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4505 std::ostringstream os;
4506 os << *prefix << "Start: "
4507 << "sorted=" << (sorted ? "true" : "false")
4508 << ", merged=" << (merged ? "true" : "false") << endl;
4509 std::cerr << os.str();
4510 }
4511 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4512 (this->isGloballyIndexed(), std::logic_error,
4513 "This method may only be called after makeIndicesLocal." );
4514 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4515 (! merged && this->isStorageOptimized(), std::logic_error,
4516 "The graph is already storage optimized, so we shouldn't be "
4517 "merging any indices. "
4518 "Please report this bug to the Tpetra developers.");
4519
4520 if (! sorted || ! merged) {
4521 const LO lclNumRows(this->getLocalNumRows());
4522 auto range = range_type(0, lclNumRows);
4523
4524 if (verbose_) {
4525 size_t totalNumDups = 0;
4526 //Sync and mark-modified the local indices before disabling WDV tracking
4527 lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4529 Kokkos::parallel_reduce(range,
4530 [this, sorted, merged] (const LO lclRow, size_t& numDups)
4531 {
4532 const RowInfo rowInfo = this->getRowInfo(lclRow);
4533 numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4534 },
4535 totalNumDups);
4537 std::ostringstream os;
4538 os << *prefix << "totalNumDups=" << totalNumDups << endl;
4539 std::cerr << os.str();
4540 }
4541 else {
4542 //Sync and mark-modified the local indices before disabling WDV tracking
4543 lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
4545 Kokkos::parallel_for(range,
4546 [this, sorted, merged] (const LO lclRow)
4547 {
4548 const RowInfo rowInfo = this->getRowInfo(lclRow);
4549 this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4550 });
4552 }
4553 this->indicesAreSorted_ = true; // we just sorted every row
4554 this->noRedundancies_ = true; // we just merged every row
4555 }
4556
4557 if (verbose_) {
4558 std::ostringstream os;
4559 os << *prefix << "Done" << endl;
4560 std::cerr << os.str();
4561 }
4562 }
4563
4564 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4565 void
4567 makeImportExport (Teuchos::Array<int>& remotePIDs,
4568 const bool useRemotePIDs)
4569 {
4571 using Teuchos::ParameterList;
4572 using Teuchos::RCP;
4573 using Teuchos::rcp;
4574 const char tfecfFuncName[] = "makeImportExport: ";
4575 ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4576
4577 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4578 (! this->hasColMap (), std::logic_error,
4579 "This method may not be called unless the graph has a column Map.");
4580 RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4581
4582 // Don't do any checks to see if we need to create the Import, if
4583 // it exists already.
4584 //
4585 // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4586 // change CrsGraph in the future to allow changing the column
4587 // Map after fillComplete. For now, the column Map is fixed
4588 // after the first fillComplete call.
4589 if (importer_.is_null ()) {
4590 // Create the Import instance if necessary.
4591 if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4592 if (params.is_null () || ! params->isSublist ("Import")) {
4593 if (useRemotePIDs) {
4594 importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4595 }
4596 else {
4597 importer_ = rcp (new import_type (domainMap_, colMap_));
4598 }
4599 }
4600 else {
4601 RCP<ParameterList> importSublist = sublist (params, "Import", true);
4602 if (useRemotePIDs) {
4603 RCP<import_type> newImp =
4604 rcp (new import_type (domainMap_, colMap_, remotePIDs,
4605 importSublist));
4606 importer_ = newImp;
4607 }
4608 else {
4609 importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4610 }
4611 }
4612 }
4613 }
4614
4615 // Don't do any checks to see if we need to create the Export, if
4616 // it exists already.
4617 if (exporter_.is_null ()) {
4618 // Create the Export instance if necessary.
4619 if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4620 if (params.is_null () || ! params->isSublist ("Export")) {
4621 exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4622 }
4623 else {
4624 RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4625 exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4626 }
4627 }
4628 }
4629 }
4630
4631
4632 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4633 std::string
4635 description () const
4636 {
4637 std::ostringstream oss;
4639 if (isFillComplete ()) {
4640 oss << "{status = fill complete"
4641 << ", global rows = " << getGlobalNumRows()
4642 << ", global cols = " << getGlobalNumCols()
4643 << ", global num entries = " << getGlobalNumEntries()
4644 << "}";
4645 }
4646 else {
4647 oss << "{status = fill not complete"
4648 << ", global rows = " << getGlobalNumRows()
4649 << "}";
4650 }
4651 return oss.str();
4652 }
4653
4654
4655 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4656 void
4658 describe (Teuchos::FancyOStream &out,
4659 const Teuchos::EVerbosityLevel verbLevel) const
4660 {
4661 using Teuchos::ArrayView;
4662 using Teuchos::Comm;
4663 using Teuchos::RCP;
4664 using Teuchos::VERB_DEFAULT;
4665 using Teuchos::VERB_NONE;
4666 using Teuchos::VERB_LOW;
4667 using Teuchos::VERB_MEDIUM;
4668 using Teuchos::VERB_HIGH;
4669 using Teuchos::VERB_EXTREME;
4670 using std::endl;
4671 using std::setw;
4672
4673 Teuchos::EVerbosityLevel vl = verbLevel;
4674 if (vl == VERB_DEFAULT) vl = VERB_LOW;
4675 RCP<const Comm<int> > comm = this->getComm();
4676 const int myImageID = comm->getRank(),
4677 numImages = comm->getSize();
4678 size_t width = 1;
4679 for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4680 ++width;
4681 }
4682 width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4683 Teuchos::OSTab tab (out);
4684 // none: print nothing
4685 // low: print O(1) info from node 0
4686 // medium: print O(P) info, num entries per node
4687 // high: print O(N) info, num entries per row
4688 // extreme: print O(NNZ) info: print graph indices
4689 //
4690 // for medium and higher, print constituent objects at specified verbLevel
4691 if (vl != VERB_NONE) {
4692 if (myImageID == 0) out << this->description() << std::endl;
4693 // O(1) globals, minus what was already printed by description()
4694 if (isFillComplete() && myImageID == 0) {
4695 out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4696 }
4697 // constituent objects
4698 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4699 if (myImageID == 0) out << "\nRow map: " << std::endl;
4700 rowMap_->describe(out,vl);
4701 if (colMap_ != Teuchos::null) {
4702 if (myImageID == 0) out << "\nColumn map: " << std::endl;
4703 colMap_->describe(out,vl);
4704 }
4705 if (domainMap_ != Teuchos::null) {
4706 if (myImageID == 0) out << "\nDomain map: " << std::endl;
4707 domainMap_->describe(out,vl);
4708 }
4709 if (rangeMap_ != Teuchos::null) {
4710 if (myImageID == 0) out << "\nRange map: " << std::endl;
4711 rangeMap_->describe(out,vl);
4712 }
4713 }
4714 // O(P) data
4715 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4716 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4717 if (myImageID == imageCtr) {
4718 out << "Node ID = " << imageCtr << std::endl
4719 << "Node number of entries = " << this->getLocalNumEntries () << std::endl
4720 << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4721 if (! indicesAreAllocated ()) {
4722 out << "Indices are not allocated." << std::endl;
4723 }
4724 }
4725 comm->barrier();
4726 comm->barrier();
4727 comm->barrier();
4728 }
4729 }
4730 // O(N) and O(NNZ) data
4731 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4732 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4733 if (myImageID == imageCtr) {
4734 out << std::setw(width) << "Node ID"
4735 << std::setw(width) << "Global Row"
4736 << std::setw(width) << "Num Entries";
4737 if (vl == VERB_EXTREME) {
4738 out << " Entries";
4739 }
4740 out << std::endl;
4741 const LocalOrdinal lclNumRows =
4742 static_cast<LocalOrdinal> (this->getLocalNumRows ());
4743 for (LocalOrdinal r=0; r < lclNumRows; ++r) {
4744 const RowInfo rowinfo = this->getRowInfo (r);
4745 GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4746 out << std::setw(width) << myImageID
4747 << std::setw(width) << gid
4748 << std::setw(width) << rowinfo.numEntries;
4749 if (vl == VERB_EXTREME) {
4750 out << " ";
4751 if (isGloballyIndexed()) {
4752 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4753 for (size_t j=0; j < rowinfo.numEntries; ++j){
4754 GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4755 out << colgid << " ";
4756 }
4757 }
4758 else if (isLocallyIndexed()) {
4759 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4760 for (size_t j=0; j < rowinfo.numEntries; ++j) {
4761 LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4762 out << colMap_->getGlobalElement(collid) << " ";
4763 }
4764 }
4765 }
4766 out << std::endl;
4767 }
4768 }
4769 comm->barrier();
4770 comm->barrier();
4771 comm->barrier();
4772 }
4773 }
4774 }
4775 }
4776
4777
4778 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4779 bool
4781 checkSizes (const SrcDistObject& /* source */)
4782 {
4783 // It's not clear what kind of compatibility checks on sizes can
4784 // be performed here. Epetra_CrsGraph doesn't check any sizes for
4785 // compatibility.
4786 return true;
4787 }
4788
4789 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4790 void
4793 (const SrcDistObject& source,
4794 const size_t numSameIDs,
4795 const Kokkos::DualView<const local_ordinal_type*,
4796 buffer_device_type>& permuteToLIDs,
4797 const Kokkos::DualView<const local_ordinal_type*,
4798 buffer_device_type>& permuteFromLIDs,
4799 const CombineMode /*CM*/)
4800 {
4801 using std::endl;
4802 using LO = local_ordinal_type;
4803 using GO = global_ordinal_type;
4804 using this_CRS_type = CrsGraph<LO, GO, node_type>;
4805 const char tfecfFuncName[] = "copyAndPermute: ";
4806 const bool verbose = verbose_;
4807
4808 std::unique_ptr<std::string> prefix;
4809 if (verbose) {
4810 prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4811 std::ostringstream os;
4812 os << *prefix << endl;
4813 std::cerr << os.str ();
4814 }
4815
4816 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4817 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
4818 std::runtime_error, "permuteToLIDs.extent(0) = "
4819 << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
4820 << permuteFromLIDs.extent (0) << ".");
4821
4822 // We know from checkSizes that the source object is a
4823 // row_graph_type, so we don't need to check again.
4824 const row_graph_type& srcRowGraph =
4825 dynamic_cast<const row_graph_type&> (source);
4826
4827 if (verbose) {
4828 std::ostringstream os;
4829 os << *prefix << "Compute padding" << endl;
4830 std::cerr << os.str ();
4831 }
4832 auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4833 permuteToLIDs, permuteFromLIDs, verbose);
4834 applyCrsPadding(*padding, verbose);
4835
4836 // If the source object is actually a CrsGraph, we can use view
4837 // mode instead of copy mode to access the entries in each row,
4838 // if the graph is not fill complete.
4839 const this_CRS_type* srcCrsGraph =
4840 dynamic_cast<const this_CRS_type*> (&source);
4841
4842 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4843 const map_type& tgtRowMap = *(getRowMap());
4844 const bool src_filled = srcRowGraph.isFillComplete();
4845 nonconst_global_inds_host_view_type row_copy;
4846 LO myid = 0;
4847
4848 //
4849 // "Copy" part of "copy and permute."
4850 //
4851 if (src_filled || srcCrsGraph == nullptr) {
4852 if (verbose) {
4853 std::ostringstream os;
4854 os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4855 std::cerr << os.str ();
4856 }
4857 // If the source graph is fill complete, we can't use view mode,
4858 // because the data might be stored in a different format not
4859 // compatible with the expectations of view mode. Also, if the
4860 // source graph is not a CrsGraph, we can't use view mode,
4861 // because RowGraph only provides copy mode access to the data.
4862 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4863 const GO gid = srcRowMap.getGlobalElement (myid);
4864 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
4865 Kokkos::resize(row_copy,row_length);
4866 size_t check_row_length = 0;
4867 srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
4868 this->insertGlobalIndices (gid, row_length, row_copy.data());
4869 }
4870 } else {
4871 if (verbose) {
4872 std::ostringstream os;
4873 os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4874 std::cerr << os.str ();
4875 }
4876 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4877 const GO gid = srcRowMap.getGlobalElement (myid);
4878 global_inds_host_view_type row;
4879 srcCrsGraph->getGlobalRowView (gid, row);
4880 this->insertGlobalIndices (gid, row.extent(0), row.data());
4881 }
4882 }
4883
4884 //
4885 // "Permute" part of "copy and permute."
4886 //
4887 auto permuteToLIDs_h = permuteToLIDs.view_host ();
4888 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
4889
4890 if (src_filled || srcCrsGraph == nullptr) {
4891 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4892 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4893 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4894 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
4895 Kokkos::resize(row_copy,row_length);
4896 size_t check_row_length = 0;
4897 srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
4898 this->insertGlobalIndices (mygid, row_length, row_copy.data());
4899 }
4900 } else {
4901 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4902 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4903 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4904 global_inds_host_view_type row;
4905 srcCrsGraph->getGlobalRowView (srcgid, row);
4906 this->insertGlobalIndices (mygid, row.extent(0), row.data());
4907 }
4908 }
4909
4910 if (verbose) {
4911 std::ostringstream os;
4912 os << *prefix << "Done" << endl;
4913 std::cerr << os.str ();
4914 }
4915 }
4916
4917 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4918 void
4920 applyCrsPadding(const padding_type& padding,
4921 const bool verbose)
4922 {
4925 using std::endl;
4926 using LO = local_ordinal_type;
4927 using row_ptrs_type =
4928 typename local_graph_device_type::row_map_type::non_const_type;
4929 using range_policy =
4930 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4931 const char tfecfFuncName[] = "applyCrsPadding";
4932 ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4933
4934 std::unique_ptr<std::string> prefix;
4935 if (verbose) {
4936 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4937 std::ostringstream os;
4938 os << *prefix << "padding: ";
4939 padding.print(os);
4940 os << endl;
4941 std::cerr << os.str();
4942 }
4943 const int myRank = ! verbose ? -1 : [&] () {
4944 auto map = this->getMap();
4945 if (map.is_null()) {
4946 return -1;
4947 }
4948 auto comm = map->getComm();
4949 if (comm.is_null()) {
4950 return -1;
4951 }
4952 return comm->getRank();
4953 } ();
4954
4955 // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4956 // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4957 // size needs to increase. That should be the job of
4958 // padCrsArrays.
4959
4960 // Assume global indexing we don't have any indices yet
4961 if (! indicesAreAllocated()) {
4962 if (verbose) {
4963 std::ostringstream os;
4964 os << *prefix << "Call allocateIndices" << endl;
4965 std::cerr << os.str();
4966 }
4967 allocateIndices(GlobalIndices, verbose);
4968 }
4969 TEUCHOS_ASSERT( indicesAreAllocated() );
4970
4971 // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4972 // would use it directly.
4973
4974 auto rowPtrsUnpacked_dev = this->getRowPtrsUnpackedDevice();
4975 if (verbose) {
4976 std::ostringstream os;
4977 os << *prefix << "Allocate row_ptrs_beg: "
4978 << rowPtrsUnpacked_dev.extent(0) << endl;
4979 std::cerr << os.str();
4980 }
4981 using Kokkos::view_alloc;
4982 using Kokkos::WithoutInitializing;
4983 row_ptrs_type row_ptrs_beg(
4984 view_alloc("row_ptrs_beg", WithoutInitializing),
4985 rowPtrsUnpacked_dev.extent(0));
4986 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4987 Kokkos::deep_copy(execution_space(),row_ptrs_beg, rowPtrsUnpacked_dev);
4988
4989 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
4990 size_t(row_ptrs_beg.extent(0) - 1);
4991 if (verbose) {
4992 std::ostringstream os;
4993 os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4994 std::cerr << os.str();
4995 }
4996 row_ptrs_type row_ptrs_end(
4997 view_alloc("row_ptrs_end", WithoutInitializing), N);
4998 row_ptrs_type num_row_entries;
4999
5000 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
5001
5002 execution_space().fence(); // we need above deep_copy to be done
5003
5004 if (refill_num_row_entries) { // Case 1: Unpacked storage
5005 // We can't assume correct *this capture until C++17, and it's
5006 // likely more efficient just to capture what we need anyway.
5007 num_row_entries =
5008 row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
5009 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
5010 Kokkos::parallel_for
5011 ("Fill end row pointers", range_policy(0, N),
5012 KOKKOS_LAMBDA (const size_t i) {
5013 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
5014 });
5015 }
5016 else {
5017 // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
5018 // storage, we don't need row_ptr_end to be separate allocation;
5019 // could just have it alias row_ptr_beg+1.
5020 Kokkos::parallel_for
5021 ("Fill end row pointers", range_policy(0, N),
5022 KOKKOS_LAMBDA (const size_t i) {
5023 row_ptrs_end(i) = row_ptrs_beg(i+1);
5024 });
5025 }
5026
5027 if (isGloballyIndexed()) {
5028 padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
5029 padding, myRank, verbose);
5030 }
5031 else {
5032 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
5033 padding, myRank, verbose);
5034 }
5035
5036 if (refill_num_row_entries) {
5037 Kokkos::parallel_for
5038 ("Fill num entries", range_policy(0, N),
5039 KOKKOS_LAMBDA (const size_t i) {
5040 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5041 });
5042 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
5043 }
5044 if (verbose) {
5045 std::ostringstream os;
5046 os << *prefix << "Reassign k_rowPtrs_; old size: "
5047 << rowPtrsUnpacked_dev.extent(0) << ", new size: "
5048 << row_ptrs_beg.extent(0) << endl;
5049 std::cerr << os.str();
5050 TEUCHOS_ASSERT( rowPtrsUnpacked_dev.extent(0) == row_ptrs_beg.extent(0) );
5051 }
5052
5053 setRowPtrsUnpacked(row_ptrs_beg);
5054 }
5055
5056 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5057 std::unique_ptr<
5058 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5059 >
5063 const size_t numSameIDs,
5064 const Kokkos::DualView<const local_ordinal_type*,
5065 buffer_device_type>& permuteToLIDs,
5066 const Kokkos::DualView<const local_ordinal_type*,
5067 buffer_device_type>& permuteFromLIDs,
5068 const bool verbose) const
5069 {
5070 using LO = local_ordinal_type;
5071 using std::endl;
5072
5073 std::unique_ptr<std::string> prefix;
5074 if (verbose) {
5075 prefix = this->createPrefix("CrsGraph",
5076 "computeCrsPadding(same & permute)");
5077 std::ostringstream os;
5078 os << *prefix << "{numSameIDs: " << numSameIDs
5079 << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5080 << endl;
5081 std::cerr << os.str();
5082 }
5083
5084 const int myRank = [&] () {
5085 auto comm = rowMap_.is_null() ? Teuchos::null :
5086 rowMap_->getComm();
5087 return comm.is_null() ? -1 : comm->getRank();
5088 } ();
5089 std::unique_ptr<padding_type> padding(
5090 new padding_type(myRank, numSameIDs,
5091 permuteFromLIDs.extent(0)));
5092
5093 computeCrsPaddingForSameIDs(*padding, source,
5094 static_cast<LO>(numSameIDs));
5095 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5096 permuteFromLIDs);
5097 return padding;
5098 }
5099
5100 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5101 void
5104 padding_type& padding,
5106 node_type>& source,
5107 const local_ordinal_type numSameIDs) const
5108 {
5109 using LO = local_ordinal_type;
5110 using GO = global_ordinal_type;
5111 using Details::Impl::getRowGraphGlobalRow;
5112 using std::endl;
5113 const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5114
5115 std::unique_ptr<std::string> prefix;
5116 const bool verbose = verbose_;
5117 if (verbose) {
5118 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5119 std::ostringstream os;
5120 os << *prefix << "numSameIDs: " << numSameIDs << endl;
5121 std::cerr << os.str();
5122 }
5123
5124 if (numSameIDs == 0) {
5125 return;
5126 }
5127
5128 const map_type& srcRowMap = *(source.getRowMap());
5129 const map_type& tgtRowMap = *rowMap_;
5130 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5131 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5132 const bool src_is_unique =
5133 srcCrs == nullptr ? false : srcCrs->isMerged();
5134 const bool tgt_is_unique = this->isMerged();
5135
5136 std::vector<GO> srcGblColIndsScratch;
5137 std::vector<GO> tgtGblColIndsScratch;
5138
5139 execute_sync_host_uvm_access(); // protect host UVM access
5140 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5141 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5142 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5143 auto srcGblColInds = getRowGraphGlobalRow(
5144 srcGblColIndsScratch, source, srcGblRowInd);
5145 auto tgtGblColInds = getRowGraphGlobalRow(
5146 tgtGblColIndsScratch, *this, tgtGblRowInd);
5147 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5148 tgtGblColInds.size(), tgt_is_unique,
5149 srcGblColInds.getRawPtr(),
5150 srcGblColInds.size(), src_is_unique);
5151 }
5152 if (verbose) {
5153 std::ostringstream os;
5154 os << *prefix << "Done" << endl;
5155 std::cerr << os.str();
5156 }
5157 }
5158
5159 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5160 void
5163 padding_type& padding,
5165 node_type>& source,
5166 const Kokkos::DualView<const local_ordinal_type*,
5167 buffer_device_type>& permuteToLIDs,
5168 const Kokkos::DualView<const local_ordinal_type*,
5169 buffer_device_type>& permuteFromLIDs) const
5170 {
5171 using LO = local_ordinal_type;
5172 using GO = global_ordinal_type;
5173 using Details::Impl::getRowGraphGlobalRow;
5174 using std::endl;
5175 const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5176
5177 std::unique_ptr<std::string> prefix;
5178 const bool verbose = verbose_;
5179 if (verbose) {
5180 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5181 std::ostringstream os;
5182 os << *prefix << "permuteToLIDs.extent(0): "
5183 << permuteToLIDs.extent(0)
5184 << ", permuteFromLIDs.extent(0): "
5185 << permuteFromLIDs.extent(0) << endl;
5186 std::cerr << os.str();
5187 }
5188
5189 if (permuteToLIDs.extent(0) == 0) {
5190 return;
5191 }
5192
5193 const map_type& srcRowMap = *(source.getRowMap());
5194 const map_type& tgtRowMap = *rowMap_;
5195 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5196 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5197 const bool src_is_unique =
5198 srcCrs == nullptr ? false : srcCrs->isMerged();
5199 const bool tgt_is_unique = this->isMerged();
5200
5201 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5202 auto permuteToLIDs_h = permuteToLIDs.view_host();
5203 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5204 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5205
5206 std::vector<GO> srcGblColIndsScratch;
5207 std::vector<GO> tgtGblColIndsScratch;
5208 const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5209
5210 execute_sync_host_uvm_access(); // protect host UVM access
5211 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5212 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5213 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5214 auto srcGblColInds = getRowGraphGlobalRow(
5215 srcGblColIndsScratch, source, srcGblRowInd);
5216 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5217 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5218 auto tgtGblColInds = getRowGraphGlobalRow(
5219 tgtGblColIndsScratch, *this, tgtGblRowInd);
5220 padding.update_permute(whichPermute, tgtLclRowInd,
5221 tgtGblColInds.getRawPtr(),
5222 tgtGblColInds.size(), tgt_is_unique,
5223 srcGblColInds.getRawPtr(),
5224 srcGblColInds.size(), src_is_unique);
5225 }
5226
5227 if (verbose) {
5228 std::ostringstream os;
5229 os << *prefix << "Done" << endl;
5230 std::cerr << os.str();
5231 }
5232 }
5233
5234 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5235 std::unique_ptr<
5236 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5237 >
5240 const Kokkos::DualView<const local_ordinal_type*,
5241 buffer_device_type>& importLIDs,
5242 Kokkos::DualView<packet_type*, buffer_device_type> imports,
5243 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5244 const bool verbose) const
5245 {
5246 using Details::Impl::getRowGraphGlobalRow;
5247 using std::endl;
5248 using LO = local_ordinal_type;
5249 using GO = global_ordinal_type;
5250 const char tfecfFuncName[] = "computeCrsPaddingForImports";
5251
5252 std::unique_ptr<std::string> prefix;
5253 if (verbose) {
5254 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5255 std::ostringstream os;
5256 os << *prefix << "importLIDs.extent(0): "
5257 << importLIDs.extent(0)
5258 << ", imports.extent(0): "
5259 << imports.extent(0)
5260 << ", numPacketsPerLID.extent(0): "
5261 << numPacketsPerLID.extent(0) << endl;
5262 std::cerr << os.str();
5263 }
5264
5265 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5266 const int myRank = [&] () {
5267 auto comm = rowMap_.is_null() ? Teuchos::null :
5268 rowMap_->getComm();
5269 return comm.is_null() ? -1 : comm->getRank();
5270 } ();
5271 std::unique_ptr<padding_type> padding(
5272 new padding_type(myRank, numImports));
5273
5274 if (imports.need_sync_host()) {
5275 imports.sync_host();
5276 }
5277 auto imports_h = imports.view_host();
5278 if (numPacketsPerLID.need_sync_host ()) {
5279 numPacketsPerLID.sync_host();
5280 }
5281 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5282
5283 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5284 auto importLIDs_h = importLIDs.view_host();
5285
5286 const map_type& tgtRowMap = *rowMap_;
5287 // Always merge source column indices, since isMerged() is
5288 // per-process state, and we don't know its value on other
5289 // processes that sent us data.
5290 constexpr bool src_is_unique = false;
5291 const bool tgt_is_unique = isMerged();
5292
5293 std::vector<GO> tgtGblColIndsScratch;
5294 size_t offset = 0;
5295 execute_sync_host_uvm_access(); // protect host UVM access
5296 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5297 // CrsGraph packs just global column indices, while CrsMatrix
5298 // packs bytes (first the number of entries in the row, then the
5299 // global column indices, then other stuff like the matrix
5300 // values in that row).
5301 const LO origSrcNumEnt =
5302 static_cast<LO>(numPacketsPerLID_h[whichImport]);
5303 GO* const srcGblColInds = imports_h.data() + offset;
5304
5305 const LO tgtLclRowInd = importLIDs_h[whichImport];
5306 const GO tgtGblRowInd =
5307 tgtRowMap.getGlobalElement(tgtLclRowInd);
5308 auto tgtGblColInds = getRowGraphGlobalRow(
5309 tgtGblColIndsScratch, *this, tgtGblRowInd);
5310 const size_t origTgtNumEnt(tgtGblColInds.size());
5311
5312 padding->update_import(whichImport, tgtLclRowInd,
5313 tgtGblColInds.getRawPtr(),
5314 origTgtNumEnt, tgt_is_unique,
5315 srcGblColInds,
5316 origSrcNumEnt, src_is_unique);
5317 offset += origSrcNumEnt;
5318 }
5319
5320 if (verbose) {
5321 std::ostringstream os;
5322 os << *prefix << "Done" << endl;
5323 std::cerr << os.str();
5324 }
5325 return padding;
5326 }
5327
5328 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5329 std::unique_ptr<
5330 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5331 >
5334 const Kokkos::DualView<const local_ordinal_type*,
5335 buffer_device_type>& importLIDs,
5336 Kokkos::DualView<char*, buffer_device_type> imports,
5337 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5338 const bool verbose) const
5339 {
5340 using Details::Impl::getRowGraphGlobalRow;
5341 using Details::PackTraits;
5342 using std::endl;
5343 using LO = local_ordinal_type;
5344 using GO = global_ordinal_type;
5345 const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5346
5347 std::unique_ptr<std::string> prefix;
5348 if (verbose) {
5349 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5350 std::ostringstream os;
5351 os << *prefix << "importLIDs.extent(0): "
5352 << importLIDs.extent(0)
5353 << ", imports.extent(0): "
5354 << imports.extent(0)
5355 << ", numPacketsPerLID.extent(0): "
5356 << numPacketsPerLID.extent(0) << endl;
5357 std::cerr << os.str();
5358 }
5359 const bool extraVerbose =
5360 verbose && Details::Behavior::verbose("CrsPadding");
5361
5362 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5363 TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5364 const int myRank = [&] () {
5365 auto comm = rowMap_.is_null() ? Teuchos::null :
5366 rowMap_->getComm();
5367 return comm.is_null() ? -1 : comm->getRank();
5368 } ();
5369 std::unique_ptr<padding_type> padding(
5370 new padding_type(myRank, numImports));
5371
5372 if (imports.need_sync_host()) {
5373 imports.sync_host();
5374 }
5375 auto imports_h = imports.view_host();
5376 if (numPacketsPerLID.need_sync_host ()) {
5377 numPacketsPerLID.sync_host();
5378 }
5379 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5380
5381 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5382 auto importLIDs_h = importLIDs.view_host();
5383
5384 const map_type& tgtRowMap = *rowMap_;
5385 // Always merge source column indices, since isMerged() is
5386 // per-process state, and we don't know its value on other
5387 // processes that sent us data.
5388 constexpr bool src_is_unique = false;
5389 const bool tgt_is_unique = isMerged();
5390
5391 std::vector<GO> srcGblColIndsScratch;
5392 std::vector<GO> tgtGblColIndsScratch;
5393 size_t offset = 0;
5394 execute_sync_host_uvm_access(); // protect host UVM access
5395 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5396 // CrsGraph packs just global column indices, while CrsMatrix
5397 // packs bytes (first the number of entries in the row, then the
5398 // global column indices, then other stuff like the matrix
5399 // values in that row).
5400 const size_t numBytes = numPacketsPerLID_h[whichImport];
5401 if (extraVerbose) {
5402 std::ostringstream os;
5403 os << *prefix << "whichImport=" << whichImport
5404 << ", numImports=" << numImports
5405 << ", numBytes=" << numBytes << endl;
5406 std::cerr << os.str();
5407 }
5408 if (numBytes == 0) {
5409 continue; // special case: no entries to unpack for this row
5410 }
5411 LO origSrcNumEnt = 0;
5412 const size_t numEntBeg = offset;
5413 const size_t numEntLen =
5414 PackTraits<LO>::packValueCount(origSrcNumEnt);
5415 TEUCHOS_ASSERT( numBytes >= numEntLen );
5416 TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5417 PackTraits<LO>::unpackValue(origSrcNumEnt,
5418 imports_h.data() + numEntBeg);
5419 if (extraVerbose) {
5420 std::ostringstream os;
5421 os << *prefix << "whichImport=" << whichImport
5422 << ", numImports=" << numImports
5423 << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5424 std::cerr << os.str();
5425 }
5426 TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5427 TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5428 const size_t gidsBeg = numEntBeg + numEntLen;
5429 if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5430 srcGblColIndsScratch.resize(origSrcNumEnt);
5431 }
5432 GO* const srcGblColInds = srcGblColIndsScratch.data();
5433 PackTraits<GO>::unpackArray(srcGblColInds,
5434 imports_h.data() + gidsBeg,
5435 origSrcNumEnt);
5436 const LO tgtLclRowInd = importLIDs_h[whichImport];
5437 const GO tgtGblRowInd =
5438 tgtRowMap.getGlobalElement(tgtLclRowInd);
5439 auto tgtGblColInds = getRowGraphGlobalRow(
5440 tgtGblColIndsScratch, *this, tgtGblRowInd);
5441 const size_t origNumTgtEnt(tgtGblColInds.size());
5442
5443 if (extraVerbose) {
5444 std::ostringstream os;
5445 os << *prefix << "whichImport=" << whichImport
5446 << ", numImports=" << numImports
5447 << ": Call padding->update_import" << endl;
5448 std::cerr << os.str();
5449 }
5450 padding->update_import(whichImport, tgtLclRowInd,
5451 tgtGblColInds.getRawPtr(),
5452 origNumTgtEnt, tgt_is_unique,
5453 srcGblColInds,
5454 origSrcNumEnt, src_is_unique);
5455 offset += numBytes;
5456 }
5457
5458 if (verbose) {
5459 std::ostringstream os;
5460 os << *prefix << "Done" << endl;
5461 std::cerr << os.str();
5462 }
5463 return padding;
5464 }
5465
5466 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5467 void
5470 (const SrcDistObject& source,
5471 const Kokkos::DualView<const local_ordinal_type*,
5472 buffer_device_type>& exportLIDs,
5473 Kokkos::DualView<packet_type*,
5474 buffer_device_type>& exports,
5475 Kokkos::DualView<size_t*,
5476 buffer_device_type> numPacketsPerLID,
5477 size_t& constantNumPackets)
5478 {
5479 using Tpetra::Details::ProfilingRegion;
5480 using GO = global_ordinal_type;
5481 using std::endl;
5482 using crs_graph_type =
5484 const char tfecfFuncName[] = "packAndPrepare: ";
5485 ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5486
5487 const bool verbose = verbose_;
5488 std::unique_ptr<std::string> prefix;
5489 if (verbose) {
5490 prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5491 std::ostringstream os;
5492 os << *prefix << "Start" << endl;
5493 std::cerr << os.str();
5494 }
5495
5496 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5497 (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5498 std::runtime_error,
5499 "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5500 << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5501 << ".");
5502 const row_graph_type* srcRowGraphPtr =
5503 dynamic_cast<const row_graph_type*> (&source);
5504 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5505 (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5506 "or Import operation to a CrsGraph must be a RowGraph with the same "
5507 "template parameters.");
5508 // We don't check whether src_graph has had fillComplete called,
5509 // because it doesn't matter whether the *source* graph has been
5510 // fillComplete'd. The target graph can not be fillComplete'd yet.
5511 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5512 (this->isFillComplete (), std::runtime_error,
5513 "The target graph of an Import or Export must not be fill complete.");
5514
5515 const crs_graph_type* srcCrsGraphPtr =
5516 dynamic_cast<const crs_graph_type*> (&source);
5517
5518 if (srcCrsGraphPtr == nullptr) {
5519 using Teuchos::ArrayView;
5520 using LO = local_ordinal_type;
5521
5522 if (verbose) {
5523 std::ostringstream os;
5524 os << *prefix << "Source is a RowGraph but not a CrsGraph"
5525 << endl;
5526 std::cerr << os.str();
5527 }
5528 // RowGraph::pack serves the "old" DistObject interface. It
5529 // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5530 // entails deep-copying the exports buffer on output. RowGraph
5531 // is a convenience interface when not a CrsGraph, so we accept
5532 // the performance hit.
5533 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5534 auto exportLIDs_h = exportLIDs.view_host ();
5535 ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5536 exportLIDs_h.extent (0));
5537 Teuchos::Array<GO> exports_a;
5538
5539 numPacketsPerLID.clear_sync_state ();
5540 numPacketsPerLID.modify_host ();
5541 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5542 ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5543 numPacketsPerLID_h.extent (0));
5544 srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5545 constantNumPackets);
5546 const size_t newSize = static_cast<size_t> (exports_a.size ());
5547 if (static_cast<size_t> (exports.extent (0)) != newSize) {
5548 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5549 exports = exports_dv_type ("exports", newSize);
5550 }
5551 Kokkos::View<const packet_type*, Kokkos::HostSpace,
5552 Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5553 exports.clear_sync_state ();
5554 exports.modify_host ();
5555 // DEEP_COPY REVIEW - NOT TESTED
5556 Kokkos::deep_copy (exports.view_host (), exports_a_h);
5557 }
5558 // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5559 else if (! getColMap ().is_null () &&
5560 (this->getRowPtrsPackedDevice().extent (0) != 0 ||
5561 getRowMap ()->getLocalNumElements () == 0)) {
5562 if (verbose) {
5563 std::ostringstream os;
5564 os << *prefix << "packCrsGraphNew path" << endl;
5565 std::cerr << os.str();
5566 }
5567 using export_pids_type =
5568 Kokkos::DualView<const int*, buffer_device_type>;
5569 export_pids_type exportPIDs; // not filling it; needed for syntax
5570 using LO = local_ordinal_type;
5571 using NT = node_type;
5573 packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5574 exports, numPacketsPerLID,
5575 constantNumPackets, false);
5576 }
5577 else {
5578 srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5579 constantNumPackets);
5580 }
5581
5582 if (verbose) {
5583 std::ostringstream os;
5584 os << *prefix << "Done" << endl;
5585 std::cerr << os.str();
5586 }
5587 }
5588
5589 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5590 void
5592 pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5593 Teuchos::Array<GlobalOrdinal>& exports,
5594 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5595 size_t& constantNumPackets) const
5596 {
5597 auto col_map = this->getColMap();
5598 // packCrsGraph requires k_rowPtrsPacked to be set
5599 if( !col_map.is_null() && (this->getRowPtrsPackedDevice().extent(0) != 0 || getRowMap()->getLocalNumElements() ==0)) {
5601 packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5602 exportLIDs, constantNumPackets);
5603 }
5604 else {
5605 this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5606 constantNumPackets);
5607 }
5608 }
5609
5610 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5611 void
5613 packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5614 Teuchos::Array<GlobalOrdinal>& exports,
5615 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5616 size_t& constantNumPackets) const
5617 {
5618 using std::endl;
5619 using LO = LocalOrdinal;
5620 using GO = GlobalOrdinal;
5621 using host_execution_space =
5622 typename Kokkos::View<size_t*, device_type>::
5623 HostMirror::execution_space;
5624 const char tfecfFuncName[] = "packFillActive: ";
5625 const bool verbose = verbose_;
5626
5627 const auto numExportLIDs = exportLIDs.size ();
5628 std::unique_ptr<std::string> prefix;
5629 if (verbose) {
5630 prefix = this->createPrefix("CrsGraph", "allocateIndices");
5631 std::ostringstream os;
5632 os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5633 std::cerr << os.str();
5634 }
5635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5636 (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5637 "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5638 " = " << numPacketsPerLID.size () << ".");
5639
5640 const map_type& rowMap = * (this->getRowMap ());
5641 const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5642 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5643 (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5644 "This graph claims to be locally indexed, but its column Map is nullptr. "
5645 "This should never happen. Please report this bug to the Tpetra "
5646 "developers.");
5647
5648 // We may pack different amounts of data for different rows.
5649 constantNumPackets = 0;
5650
5651 // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5652 // it might be now, but we might as well be safe).
5653 size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5654 const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5655
5656 // Count the total number of packets (column indices, in the case
5657 // of a CrsGraph) to pack. While doing so, set
5658 // numPacketsPerLID[i] to the number of entries owned by the
5659 // calling process in (local) row exportLIDs[i] of the graph, that
5660 // the caller wants us to send out.
5661 Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5662 size_t totalNumPackets = 0;
5663 size_t errCount = 0;
5664 // lambdas turn what they capture const, so we can't
5665 // atomic_add(&errCount,1). Instead, we need a View to modify.
5666 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5667 host_device_type;
5668 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5669 constexpr size_t ONE = 1;
5670
5671 execute_sync_host_uvm_access(); // protect host UVM access
5672 Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5673 inputRange,
5674 [=] (const LO& i, size_t& curTotalNumPackets) {
5675 const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5676 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5677 Kokkos::atomic_add (&errCountView(), ONE);
5678 numPacketsPerLID_raw[i] = 0;
5679 }
5680 else {
5681 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5682 numPacketsPerLID_raw[i] = numEnt;
5683 curTotalNumPackets += numEnt;
5684 }
5685 },
5686 totalNumPackets);
5687
5688 if (verbose) {
5689 std::ostringstream os;
5690 os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5691 std::cerr << os.str();
5692 }
5693 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5694 (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5695 "one or more errors! errCount = " << errCount
5696 << ", totalNumPackets = " << totalNumPackets << ".");
5697 errCount = 0;
5698
5699 // Allocate space for all the column indices to pack.
5700 exports.resize (totalNumPackets);
5701
5702 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5703 (! this->supportsRowViews (), std::logic_error,
5704 "this->supportsRowViews() returns false; this should never happen. "
5705 "Please report this bug to the Tpetra developers.");
5706
5707 // Loop again over the rows to export, and pack rows of indices
5708 // into the output buffer.
5709
5710 if (verbose) {
5711 std::ostringstream os;
5712 os << *prefix << "Pack into exports" << endl;
5713 std::cerr << os.str();
5714 }
5715
5716 // Teuchos::ArrayView may not be thread safe, or may not be
5717 // efficiently thread safe. Better to use the raw pointer.
5718 GO* const exports_raw = exports.getRawPtr ();
5719 errCount = 0;
5720 Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
5721 inputRange, [=, &prefix]
5722 (const LO i, size_t& exportsOffset, const bool final) {
5723 const size_t curOffset = exportsOffset;
5724 const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5725 const RowInfo rowInfo =
5726 this->getRowInfoFromGlobalRowIndex (gblRow);
5727
5728 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5729 if (rowInfo.localRow == TDO::invalid ()) {
5730 if (verbose) {
5731 std::ostringstream os;
5732 os << *prefix << ": INVALID rowInfo: i=" << i
5733 << ", lclRow=" << exportLIDs_raw[i] << endl;
5734 std::cerr << os.str();
5735 }
5736 Kokkos::atomic_add (&errCountView(), ONE);
5737 }
5738 else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5739 if (verbose) {
5740 std::ostringstream os;
5741 os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5742 << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5743 "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
5744 << ") > totalNumPackets (= " << totalNumPackets << ")."
5745 << endl;
5746 std::cerr << os.str();
5747 }
5748 Kokkos::atomic_add (&errCountView(), ONE);
5749 }
5750 else {
5751 const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5752 if (this->isLocallyIndexed ()) {
5753 auto lclColInds = getLocalIndsViewHost (rowInfo);
5754 if (final) {
5755 for (LO k = 0; k < numEnt; ++k) {
5756 const LO lclColInd = lclColInds(k);
5757 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5758 // Pack it, even if it's wrong. Let the receiving
5759 // process deal with it. Otherwise, we'll miss out
5760 // on any correct data.
5761 exports_raw[curOffset + k] = gblColInd;
5762 } // for each entry in the row
5763 } // final pass?
5764 exportsOffset = curOffset + numEnt;
5765 }
5766 else if (this->isGloballyIndexed ()) {
5767 auto gblColInds = getGlobalIndsViewHost (rowInfo);
5768 if (final) {
5769 for (LO k = 0; k < numEnt; ++k) {
5770 const GO gblColInd = gblColInds(k);
5771 // Pack it, even if it's wrong. Let the receiving
5772 // process deal with it. Otherwise, we'll miss out
5773 // on any correct data.
5774 exports_raw[curOffset + k] = gblColInd;
5775 } // for each entry in the row
5776 } // final pass?
5777 exportsOffset = curOffset + numEnt;
5778 }
5779 // If neither globally nor locally indexed, then the graph
5780 // has no entries in this row (or indeed, in any row on this
5781 // process) to pack.
5782 }
5783 });
5784
5785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5786 (errCount != 0, std::logic_error, "Packing encountered "
5787 "one or more errors! errCount = " << errCount
5788 << ", totalNumPackets = " << totalNumPackets << ".");
5789
5790 if (verbose) {
5791 std::ostringstream os;
5792 os << *prefix << "Done" << endl;
5793 std::cerr << os.str();
5794 }
5795 }
5796
5797 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5798 void
5800 packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
5801 buffer_device_type>& exportLIDs,
5802 Kokkos::DualView<packet_type*,
5803 buffer_device_type>& exports,
5804 Kokkos::DualView<size_t*,
5805 buffer_device_type> numPacketsPerLID,
5806 size_t& constantNumPackets) const
5807 {
5808 using std::endl;
5809 using LO = local_ordinal_type;
5810 using GO = global_ordinal_type;
5811 using host_execution_space = typename Kokkos::View<size_t*,
5812 device_type>::HostMirror::execution_space;
5813 using host_device_type =
5814 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5815 using exports_dv_type =
5816 Kokkos::DualView<packet_type*, buffer_device_type>;
5817 const char tfecfFuncName[] = "packFillActiveNew: ";
5818 const bool verbose = verbose_;
5819
5820 const auto numExportLIDs = exportLIDs.extent (0);
5821 std::unique_ptr<std::string> prefix;
5822 if (verbose) {
5823 prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5824 std::ostringstream os;
5825 os << *prefix << "numExportLIDs: " << numExportLIDs
5826 << ", numPacketsPerLID.extent(0): "
5827 << numPacketsPerLID.extent(0) << endl;
5828 std::cerr << os.str();
5829 }
5830 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5831 (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
5832 "exportLIDs.extent(0) = " << numExportLIDs
5833 << " != numPacketsPerLID.extent(0) = "
5834 << numPacketsPerLID.extent (0) << ".");
5835 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5836 auto exportLIDs_h = exportLIDs.view_host ();
5837
5838 const map_type& rowMap = * (this->getRowMap ());
5839 const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5840 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5841 (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5842 "This graph claims to be locally indexed, but its column Map is nullptr. "
5843 "This should never happen. Please report this bug to the Tpetra "
5844 "developers.");
5845
5846 // We may pack different amounts of data for different rows.
5847 constantNumPackets = 0;
5848
5849 numPacketsPerLID.clear_sync_state ();
5850 numPacketsPerLID.modify_host ();
5851 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5852
5853 // Count the total number of packets (column indices, in the case
5854 // of a CrsGraph) to pack. While doing so, set
5855 // numPacketsPerLID[i] to the number of entries owned by the
5856 // calling process in (local) row exportLIDs[i] of the graph, that
5857 // the caller wants us to send out.
5858 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5859 range_type inputRange (0, numExportLIDs);
5860 size_t totalNumPackets = 0;
5861 size_t errCount = 0;
5862 // lambdas turn what they capture const, so we can't
5863 // atomic_add(&errCount,1). Instead, we need a View to modify.
5864 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5865 constexpr size_t ONE = 1;
5866
5867 if (verbose) {
5868 std::ostringstream os;
5869 os << *prefix << "Compute totalNumPackets" << endl;
5870 std::cerr << os.str ();
5871 }
5872
5873 execute_sync_host_uvm_access(); // protect host UVM access
5874 totalNumPackets = 0;
5875 for (size_t i=0; i<numExportLIDs; ++i) {
5876 const LO lclRow = exportLIDs_h[i];
5877 const GO gblRow = rowMap.getGlobalElement (lclRow);
5878 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5879 if (verbose) {
5880 std::ostringstream os;
5881 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5882 << " not in row Map on this process" << endl;
5883 std::cerr << os.str();
5884 }
5885 Kokkos::atomic_add (&errCountView(), ONE);
5886 numPacketsPerLID_h(i) = 0;
5887 }
5888 else {
5889 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5890 numPacketsPerLID_h(i) = numEnt;
5891 totalNumPackets += numEnt;
5892 }
5893 }
5894
5895 if (verbose) {
5896 std::ostringstream os;
5897 os << *prefix << "totalNumPackets: " << totalNumPackets
5898 << ", errCount: " << errCount << endl;
5899 std::cerr << os.str ();
5900 }
5901 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5902 (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5903 "one or more errors! totalNumPackets: " << totalNumPackets
5904 << ", errCount: " << errCount << ".");
5905
5906 // Allocate space for all the column indices to pack.
5907 if (size_t(exports.extent (0)) < totalNumPackets) {
5908 // FIXME (mfh 09 Apr 2019) Create without initializing.
5909 exports = exports_dv_type ("exports", totalNumPackets);
5910 }
5911
5912 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5913 (! this->supportsRowViews (), std::logic_error,
5914 "this->supportsRowViews() returns false; this should never happen. "
5915 "Please report this bug to the Tpetra developers.");
5916
5917 // Loop again over the rows to export, and pack rows of indices
5918 // into the output buffer.
5919
5920 if (verbose) {
5921 std::ostringstream os;
5922 os << *prefix << "Pack into exports buffer" << endl;
5923 std::cerr << os.str();
5924 }
5925
5926 exports.clear_sync_state ();
5927 exports.modify_host ();
5928 auto exports_h = exports.view_host ();
5929
5930 errCount = 0;
5931
5932 // The following parallel_scan needs const host access to lclIndsUnpacked_wdv
5933 // (if locally indexed) or gblInds_wdv (if globally indexed).
5934 if(isLocallyIndexed())
5935 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5936 else if(isGloballyIndexed())
5937 gblInds_wdv.getHostView(Access::ReadOnly);
5938
5940 Kokkos::parallel_scan
5941 ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5942 inputRange, [=, &prefix]
5943 (const LO i, size_t& exportsOffset, const bool final) {
5944 const size_t curOffset = exportsOffset;
5945 const LO lclRow = exportLIDs_h(i);
5946 const GO gblRow = rowMap.getGlobalElement (lclRow);
5947 if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
5948 if (verbose) {
5949 std::ostringstream os;
5950 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5951 << " not in row Map on this process" << endl;
5952 std::cerr << os.str();
5953 }
5954 Kokkos::atomic_add (&errCountView(), ONE);
5955 return;
5956 }
5957
5958 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
5959 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
5960 if (verbose) {
5961 std::ostringstream os;
5962 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5963 << ", gblRow=" << gblRow << ": invalid rowInfo"
5964 << endl;
5965 std::cerr << os.str();
5966 }
5967 Kokkos::atomic_add (&errCountView(), ONE);
5968 return;
5969 }
5970
5971 if (curOffset + rowInfo.numEntries > totalNumPackets) {
5972 if (verbose) {
5973 std::ostringstream os;
5974 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5975 << ", gblRow=" << gblRow << ", curOffset (= "
5976 << curOffset << ") + numEnt (= " << rowInfo.numEntries
5977 << ") > totalNumPackets (= " << totalNumPackets
5978 << ")." << endl;
5979 std::cerr << os.str();
5980 }
5981 Kokkos::atomic_add (&errCountView(), ONE);
5982 return;
5983 }
5984
5985 const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5986 if (this->isLocallyIndexed ()) {
5987 auto lclColInds = getLocalIndsViewHost(rowInfo);
5988 if (final) {
5989 for (LO k = 0; k < numEnt; ++k) {
5990 const LO lclColInd = lclColInds(k);
5991 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5992 // Pack it, even if it's wrong. Let the receiving
5993 // process deal with it. Otherwise, we'll miss out
5994 // on any correct data.
5995 exports_h(curOffset + k) = gblColInd;
5996 } // for each entry in the row
5997 } // final pass?
5998 exportsOffset = curOffset + numEnt;
5999 }
6000 else if (this->isGloballyIndexed ()) {
6001 auto gblColInds = getGlobalIndsViewHost(rowInfo);
6002 if (final) {
6003 for (LO k = 0; k < numEnt; ++k) {
6004 const GO gblColInd = gblColInds(k);
6005 // Pack it, even if it's wrong. Let the receiving
6006 // process deal with it. Otherwise, we'll miss out
6007 // on any correct data.
6008 exports_h(curOffset + k) = gblColInd;
6009 } // for each entry in the row
6010 } // final pass?
6011 exportsOffset = curOffset + numEnt;
6012 }
6013 // If neither globally nor locally indexed, then the graph
6014 // has no entries in this row (or indeed, in any row on this
6015 // process) to pack.
6016 });
6018
6019 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6020 // (errCount != 0, std::logic_error, "Packing encountered "
6021 // "one or more errors! errCount = " << errCount
6022 // << ", totalNumPackets = " << totalNumPackets << ".");
6023
6024 if (verbose) {
6025 std::ostringstream os;
6026 os << *prefix << "errCount=" << errCount << "; Done" << endl;
6027 std::cerr << os.str();
6028 }
6029 }
6030
6031 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6032 void
6035 (const Kokkos::DualView<const local_ordinal_type*,
6036 buffer_device_type>& importLIDs,
6037 Kokkos::DualView<packet_type*,
6038 buffer_device_type> imports,
6039 Kokkos::DualView<size_t*,
6040 buffer_device_type> numPacketsPerLID,
6041 const size_t /* constantNumPackets */,
6042 const CombineMode /* combineMode */ )
6043 {
6045 using std::endl;
6046 using LO = local_ordinal_type;
6047 using GO = global_ordinal_type;
6048 const char tfecfFuncName[] = "unpackAndCombine";
6049
6050 ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
6051 const bool verbose = verbose_;
6052
6053 std::unique_ptr<std::string> prefix;
6054 if (verbose) {
6055 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6056 std::ostringstream os;
6057 os << *prefix << "Start" << endl;
6058 std::cerr << os.str ();
6059 }
6060 {
6061 auto padding = computeCrsPaddingForImports(
6062 importLIDs, imports, numPacketsPerLID, verbose);
6063 applyCrsPadding(*padding, verbose);
6064 if (verbose) {
6065 std::ostringstream os;
6066 os << *prefix << "Done computing & applying padding" << endl;
6067 std::cerr << os.str ();
6068 }
6069 }
6070
6071 // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6072 // reasonable meaning, whether or not the matrix is fill complete.
6073 // It's just more work to implement.
6074
6075 // We are not checking the value of the CombineMode input
6076 // argument. For CrsGraph, we only support import/export
6077 // operations if fillComplete has not yet been called. Any
6078 // incoming column-indices are inserted into the target graph. In
6079 // this context, CombineMode values of ADD vs INSERT are
6080 // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6081 // duplicate column-index is inserted, it will be compressed out
6082 // when fillComplete is called.
6083 //
6084 // Note: I think REPLACE means that an existing row is replaced by
6085 // the imported row, i.e., the existing indices are cleared. CGB,
6086 // 6/17/2010
6087
6088 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6089 (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6090 std::runtime_error, ": importLIDs.extent(0) = "
6091 << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6092 << numPacketsPerLID.extent (0) << ".");
6093 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6094 (isFillComplete (), std::runtime_error,
6095 ": Import or Export operations are not allowed on a target "
6096 "CrsGraph that is fillComplete.");
6097
6098 const size_t numImportLIDs(importLIDs.extent(0));
6099 if (numPacketsPerLID.need_sync_host()) {
6100 numPacketsPerLID.sync_host();
6101 }
6102 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6103 if (imports.need_sync_host()) {
6104 imports.sync_host();
6105 }
6106 auto imports_h = imports.view_host();
6107 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6108 auto importLIDs_h = importLIDs.view_host();
6109
6110 // If we're inserting in local indices, let's pre-allocate
6111 Teuchos::Array<LO> lclColInds;
6112 if (isLocallyIndexed()) {
6113 if (verbose) {
6114 std::ostringstream os;
6115 os << *prefix << "Preallocate local indices scratch" << endl;
6116 std::cerr << os.str();
6117 }
6118 size_t maxNumInserts = 0;
6119 for (size_t i = 0; i < numImportLIDs; ++i) {
6120 maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6121 }
6122 if (verbose) {
6123 std::ostringstream os;
6124 os << *prefix << "Local indices scratch size: "
6125 << maxNumInserts << endl;
6126 std::cerr << os.str();
6127 }
6128 lclColInds.resize (maxNumInserts);
6129 }
6130 else {
6131 if (verbose) {
6132 std::ostringstream os;
6133 os << *prefix;
6134 if (isGloballyIndexed()) {
6135 os << "Graph is globally indexed";
6136 }
6137 else {
6138 os << "Graph is neither locally nor globally indexed";
6139 }
6140 os << endl;
6141 std::cerr << os.str();
6142 }
6143 }
6144
6145 TEUCHOS_ASSERT( ! rowMap_.is_null() );
6146 const map_type& rowMap = *rowMap_;
6147
6148 try {
6149 size_t importsOffset = 0;
6150 for (size_t i = 0; i < numImportLIDs; ++i) {
6151 if (verbose) {
6152 std::ostringstream os;
6153 os << *prefix << "i=" << i << ", numImportLIDs="
6154 << numImportLIDs << endl;
6155 std::cerr << os.str();
6156 }
6157 // We can only unpack into owned rows, since we only have
6158 // local row indices.
6159 const LO lclRow = importLIDs_h[i];
6160 const GO gblRow = rowMap.getGlobalElement(lclRow);
6161 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6162 (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6163 std::logic_error, "importLIDs[i=" << i << "]="
6164 << lclRow << " is not in the row Map on the calling "
6165 "process.");
6166 const LO numEnt = numPacketsPerLID_h[i];
6167 const GO* const gblColInds = (numEnt == 0) ? nullptr :
6168 imports_h.data() + importsOffset;
6169 if (! isLocallyIndexed()) {
6170 insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6171 }
6172 else {
6173 // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6174 // column Map filtering.
6175 for (LO j = 0; j < numEnt; j++) {
6176 lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6177 }
6178 insertLocalIndices(lclRow, numEnt, lclColInds.data());
6179 }
6180 importsOffset += numEnt;
6181 }
6182 }
6183 catch (std::exception& e) {
6184 TEUCHOS_TEST_FOR_EXCEPTION
6185 (true, std::runtime_error,
6186 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6187 "exception: " << endl << e.what());
6188 }
6189
6190 if (verbose) {
6191 std::ostringstream os;
6192 os << *prefix << "Done" << endl;
6193 std::cerr << os.str();
6194 }
6195 }
6196
6197 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6198 void
6200 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6201 {
6202 using Teuchos::Comm;
6203 using Teuchos::null;
6204 using Teuchos::ParameterList;
6205 using Teuchos::RCP;
6206
6207 // We'll set all the state "transactionally," so that this method
6208 // satisfies the strong exception guarantee. This object's state
6209 // won't be modified until the end of this method.
6210 RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6211 RCP<import_type> importer;
6212 RCP<export_type> exporter;
6213
6214 rowMap = newMap;
6215 RCP<const Comm<int> > newComm =
6216 (newMap.is_null ()) ? null : newMap->getComm ();
6217
6218 if (! domainMap_.is_null ()) {
6219 if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6220 // Common case: original domain and row Maps are identical.
6221 // In that case, we need only replace the original domain Map
6222 // with the new Map. This ensures that the new domain and row
6223 // Maps _stay_ identical.
6224 domainMap = newMap;
6225 } else {
6226 domainMap = domainMap_->replaceCommWithSubset (newComm);
6227 }
6228 }
6229 if (! rangeMap_.is_null ()) {
6230 if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6231 // Common case: original range and row Maps are identical. In
6232 // that case, we need only replace the original range Map with
6233 // the new Map. This ensures that the new range and row Maps
6234 // _stay_ identical.
6235 rangeMap = newMap;
6236 } else {
6237 rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6238 }
6239 }
6240 if (! colMap_.is_null ()) {
6241 colMap = colMap_->replaceCommWithSubset (newComm);
6242 }
6243
6244 // (Re)create the Export and / or Import if necessary.
6245 if (! newComm.is_null ()) {
6246 RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6247 //
6248 // The operations below are collective on the new communicator.
6249 //
6250 // (Re)create the Export object if necessary. If I haven't
6251 // called fillComplete yet, I don't have a rangeMap, so I must
6252 // first check if the _original_ rangeMap is not null. Ditto
6253 // for the Import object and the domain Map.
6254 if (! rangeMap_.is_null () &&
6255 rangeMap != rowMap &&
6256 ! rangeMap->isSameAs (*rowMap)) {
6257 if (params.is_null () || ! params->isSublist ("Export")) {
6258 exporter = rcp (new export_type (rowMap, rangeMap));
6259 }
6260 else {
6261 RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6262 exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6263 }
6264 }
6265 // (Re)create the Import object if necessary.
6266 if (! domainMap_.is_null () &&
6267 domainMap != colMap &&
6268 ! domainMap->isSameAs (*colMap)) {
6269 if (params.is_null () || ! params->isSublist ("Import")) {
6270 importer = rcp (new import_type (domainMap, colMap));
6271 } else {
6272 RCP<ParameterList> importSublist = sublist (params, "Import", true);
6273 importer = rcp (new import_type (domainMap, colMap, importSublist));
6274 }
6275 }
6276 } // if newComm is not null
6277
6278 // Defer side effects until the end. If no destructors throw
6279 // exceptions (they shouldn't anyway), then this method satisfies
6280 // the strong exception guarantee.
6281 exporter_ = exporter;
6282 importer_ = importer;
6283 rowMap_ = rowMap;
6284 // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6285 // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6286 // the same object. We might want to get rid of this redundant
6287 // pointer sometime, but for now, we'll leave it alone and just
6288 // set map_ to the same object.
6289 this->map_ = rowMap;
6290 domainMap_ = domainMap;
6291 rangeMap_ = rangeMap;
6292 colMap_ = colMap;
6293 }
6294
6295 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6296 void
6298 getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6299 {
6300 using std::endl;
6301 using LO = LocalOrdinal;
6302 using GO = GlobalOrdinal;
6303 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6304 const bool verbose = verbose_;
6305
6306 std::unique_ptr<std::string> prefix;
6307 if (verbose) {
6308 prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6309 std::ostringstream os;
6310 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6311 << endl;
6312 std::cerr << os.str();
6313 }
6314
6315 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6316 (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6317 const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
6318 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6319 (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6320 std::invalid_argument, "offsets.extent(0) = " <<
6321 offsets.extent (0) << " < getLocalNumRows() = " << lclNumRows << ".");
6322
6323 const map_type& rowMap = * (this->getRowMap ());
6324 const map_type& colMap = * (this->getColMap ());
6325
6326 // We only use these in debug mode, but since debug mode is a
6327 // run-time option, they need to exist here. That's why we create
6328 // the vector with explicit size zero, to avoid overhead if debug
6329 // mode is off.
6330 bool allRowMapDiagEntriesInColMap = true;
6331 bool allDiagEntriesFound = true;
6332 bool allOffsetsCorrect = true;
6333 bool noOtherWeirdness = true;
6334 using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6335 wrong_offsets_type wrongOffsets(0);
6336
6337 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6338 // the subset of Map functionality that we need below.
6339 auto lclRowMap = rowMap.getLocalMap ();
6340 auto lclColMap = colMap.getLocalMap ();
6341
6342 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6343 // setup, at least on the host. For CUDA, we have to use LocalMap
6344 // (that comes from each of the two Maps).
6345
6346 const bool sorted = this->isSorted ();
6347 if (isFillComplete ()) {
6348 auto lclGraph = this->getLocalGraphDevice ();
6349 ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6350 lclGraph.row_map,
6351 lclGraph.entries, sorted);
6352 }
6353 else {
6354 // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6355 // since the graph is not fill complete. The previous version
6356 // of this code assumed UVM; this version does not.
6357 auto offsets_h = Kokkos::create_mirror_view (offsets);
6358
6359 for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6360 // Find the diagonal entry. Since the row Map and column Map
6361 // may differ, we have to compare global row and column
6362 // indices, not local.
6363 const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6364 const GO gblColInd = gblRowInd;
6365 const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6366
6367 if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6368 allRowMapDiagEntriesInColMap = false;
6369 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6370 }
6371 else {
6372 const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6373 if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6374 rowInfo.numEntries > 0) {
6375
6376 auto colInds = this->getLocalIndsViewHost (rowInfo);
6377 const size_t hint = 0; // not needed for this algorithm
6378 const size_t offset =
6379 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6380 lclColInd, hint, sorted);
6381 offsets_h(lclRowInd) = offset;
6382
6383 if (debug_) {
6384 // Now that we have what we think is an offset, make sure
6385 // that it really does point to the diagonal entry. Offsets
6386 // are _relative_ to each row, not absolute (for the whole
6387 // (local) graph).
6388 typename local_inds_dualv_type::t_host::const_type lclColInds;
6389 try {
6390 lclColInds = this->getLocalIndsViewHost (rowInfo);
6391 }
6392 catch (...) {
6393 noOtherWeirdness = false;
6394 }
6395 // Don't continue with error checking if the above failed.
6396 if (noOtherWeirdness) {
6397 const size_t numEnt = lclColInds.extent (0);
6398 if (offset >= numEnt) {
6399 // Offsets are relative to each row, so this means that
6400 // the offset is out of bounds.
6401 allOffsetsCorrect = false;
6402 wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6403 } else {
6404 const LO actualLclColInd = lclColInds(offset);
6405 const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6406 if (actualGblColInd != gblColInd) {
6407 allOffsetsCorrect = false;
6408 wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6409 }
6410 }
6411 }
6412 } // debug_
6413 }
6414 else { // either row is empty, or something went wrong w/ getRowInfo()
6415 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6416 allDiagEntriesFound = false;
6417 }
6418 } // whether lclColInd is a valid local column index
6419 } // for each local row
6420 // DEEP_COPY REVIEW - NOT TESTED
6421 Kokkos::deep_copy (offsets, offsets_h);
6422 } // whether the graph is fill complete
6423
6424 if (verbose && wrongOffsets.size () != 0) {
6425 std::ostringstream os;
6426 os << *prefix << "Wrong offsets: [";
6427 for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6428 os << "(" << wrongOffsets[k].first << ","
6429 << wrongOffsets[k].second << ")";
6430 if (k + 1 < wrongOffsets.size ()) {
6431 os << ", ";
6432 }
6433 }
6434 os << "]" << endl;
6435 std::cerr << os.str();
6436 }
6437
6438 if (debug_) {
6439 using Teuchos::reduceAll;
6440 using std::endl;
6441 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6442 const bool localSuccess =
6443 allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6444 const int numResults = 5;
6445 int lclResults[5];
6446 lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6447 lclResults[1] = allDiagEntriesFound ? 1 : 0;
6448 lclResults[2] = allOffsetsCorrect ? 1 : 0;
6449 lclResults[3] = noOtherWeirdness ? 1 : 0;
6450 // min-all-reduce will compute least rank of all the processes
6451 // that didn't succeed.
6452 lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6453
6454 int gblResults[5];
6455 gblResults[0] = 0;
6456 gblResults[1] = 0;
6457 gblResults[2] = 0;
6458 gblResults[3] = 0;
6459 gblResults[4] = 0;
6460 reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6461 numResults, lclResults, gblResults);
6462
6463 if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6464 || gblResults[3] != 1) {
6465 std::ostringstream os; // build error message
6466 os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6467 "possibly among others): " << endl;
6468 if (gblResults[0] == 0) {
6469 os << " - The column Map does not contain at least one diagonal entry "
6470 "of the graph." << endl;
6471 }
6472 if (gblResults[1] == 0) {
6473 os << " - On one or more processes, some row does not contain a "
6474 "diagonal entry." << endl;
6475 }
6476 if (gblResults[2] == 0) {
6477 os << " - On one or more processes, some offsets are incorrect."
6478 << endl;
6479 }
6480 if (gblResults[3] == 0) {
6481 os << " - One or more processes had some other error."
6482 << endl;
6483 }
6484 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6485 }
6486 } // debug_
6487 }
6488
6489 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6490 void
6492 getLocalOffRankOffsets (offset_device_view_type& offsets) const
6493 {
6494 using std::endl;
6495 const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6496 const bool verbose = verbose_;
6497
6498 std::unique_ptr<std::string> prefix;
6499 if (verbose) {
6500 prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6501 std::ostringstream os;
6502 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6503 << endl;
6504 std::cerr << os.str();
6505 }
6506
6507 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6508 (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6509 // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6510
6511 const size_t lclNumRows = this->getLocalNumRows ();
6512
6513 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) {
6514 offsets = k_offRankOffsets_;
6515 return;
6516 }
6517 haveLocalOffRankOffsets_ = false;
6518
6519 const map_type& colMap = * (this->getColMap ());
6520 const map_type& domMap = * (this->getDomainMap ());
6521
6522 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6523 // the subset of Map functionality that we need below.
6524 auto lclColMap = colMap.getLocalMap ();
6525 auto lclDomMap = domMap.getLocalMap ();
6526
6527 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6528 // setup, at least on the host. For CUDA, we have to use LocalMap
6529 // (that comes from each of the two Maps).
6530
6531 TEUCHOS_ASSERT(this->isSorted ());
6532 if (isFillComplete ()) {
6533 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1);
6534 auto lclGraph = this->getLocalGraphDevice ();
6535 ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6536 lclColMap, lclDomMap,
6537 lclGraph);
6538 offsets = k_offRankOffsets_;
6539 haveLocalOffRankOffsets_ = true;
6540 } else {
6541 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6542 (true, std::logic_error, "Can't get off-rank offsets for non-fill-complete graph");
6543 }
6544 }
6545
6546 namespace { // (anonymous)
6547
6548 // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6549 // below). The point is to avoid the deep copy between the input
6550 // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6551 // can't use UVM to avoid the deep copy with CUDA, because the
6552 // ArrayRCP is a host pointer, while the input to the graph's
6553 // getLocalDiagOffsets method is a device pointer. Assigning a
6554 // host pointer to a device pointer is incorrect unless the host
6555 // pointer points to host pinned memory. The goal is to get rid
6556 // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6557 // copy for backwards compatibility.
6558 //
6559 // We have to use template magic because
6560 // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6561 // if device_type::memory_space is not Kokkos::HostSpace (as is
6562 // the case with CUDA).
6563
6564 template<class DeviceType,
6565 const bool memSpaceIsHostSpace =
6566 std::is_same<typename DeviceType::memory_space,
6567 Kokkos::HostSpace>::value>
6568 struct HelpGetLocalDiagOffsets {};
6569
6570 template<class DeviceType>
6571 struct HelpGetLocalDiagOffsets<DeviceType, true> {
6572 typedef DeviceType device_type;
6573 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6574 Kokkos::MemoryUnmanaged> device_offsets_type;
6575 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6576 Kokkos::MemoryUnmanaged> host_offsets_type;
6577
6578 static device_offsets_type
6579 getDeviceOffsets (const host_offsets_type& hostOffsets)
6580 {
6581 // Host and device are the same; no need to allocate a
6582 // temporary device View.
6583 return hostOffsets;
6584 }
6585
6586 static void
6587 copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6588 const device_offsets_type& /* deviceOffsets */)
6589 { /* copy back not needed; host and device are the same */ }
6590 };
6591
6592 template<class DeviceType>
6593 struct HelpGetLocalDiagOffsets<DeviceType, false> {
6594 typedef DeviceType device_type;
6595 // We have to do a deep copy, since host memory space != device
6596 // memory space. Thus, the device View is managed (we need to
6597 // allocate a temporary device View).
6598 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6599 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6600 Kokkos::MemoryUnmanaged> host_offsets_type;
6601
6602 static device_offsets_type
6603 getDeviceOffsets (const host_offsets_type& hostOffsets)
6604 {
6605 // Host memory space != device memory space, so we must
6606 // allocate a temporary device View for the graph.
6607 return device_offsets_type ("offsets", hostOffsets.extent (0));
6608 }
6609
6610 static void
6611 copyBackIfNeeded (const host_offsets_type& hostOffsets,
6612 const device_offsets_type& deviceOffsets)
6613 {
6614 // DEEP_COPY REVIEW - NOT TESTED
6615 Kokkos::deep_copy (hostOffsets, deviceOffsets);
6616 }
6617 };
6618 } // namespace (anonymous)
6619
6620
6621 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6622 void
6624 getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6625 {
6626 typedef LocalOrdinal LO;
6627 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6628 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6629 (! this->hasColMap (), std::runtime_error,
6630 "The graph does not yet have a column Map.");
6631 const LO myNumRows = static_cast<LO> (this->getLocalNumRows ());
6632 if (static_cast<LO> (offsets.size ()) != myNumRows) {
6633 // NOTE (mfh 21 Jan 2016) This means that the method does not
6634 // satisfy the strong exception guarantee (no side effects
6635 // unless successful).
6636 offsets.resize (myNumRows);
6637 }
6638
6639 // mfh 21 Jan 2016: This method unfortunately takes a
6640 // Teuchos::ArrayRCP, which is host memory. The graph wants a
6641 // device pointer. We can't access host memory from the device;
6642 // that's the wrong direction for UVM. (It's the right direction
6643 // for inefficient host pinned memory, but we don't want to use
6644 // that here.) Thus, if device memory space != host memory space,
6645 // we allocate and use a temporary device View to get the offsets.
6646 // If the two spaces are equal, the template magic makes the deep
6647 // copy go away.
6648 typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6649 typedef typename helper_type::host_offsets_type host_offsets_type;
6650 // Unmanaged host View that views the output array.
6651 host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6652 // Allocate temp device View if host != device, else reuse host array.
6653 auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6654 // NOT recursion; this calls the overload that takes a device View.
6655 this->getLocalDiagOffsets (deviceOffsets);
6656 helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6657 }
6658
6659 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6660 bool
6665
6666 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6667 void
6670 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6671 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6672 const Teuchos::RCP<const map_type>& domainMap,
6673 const Teuchos::RCP<const map_type>& rangeMap,
6674 const Teuchos::RCP<Teuchos::ParameterList>& params) const
6675 {
6680 using Teuchos::ArrayRCP;
6681 using Teuchos::ArrayView;
6682 using Teuchos::Comm;
6683 using Teuchos::ParameterList;
6684 using Teuchos::rcp;
6685 using Teuchos::RCP;
6686#ifdef HAVE_TPETRA_MMM_TIMINGS
6687 using std::string;
6688 using Teuchos::TimeMonitor;
6689#endif
6690
6691 using LO = LocalOrdinal;
6692 using GO = GlobalOrdinal;
6693 using NT = node_type;
6694 using this_CRS_type = CrsGraph<LO, GO, NT>;
6695 using ivector_type = Vector<int, LO, GO, NT>;
6696
6697 const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6698
6699#ifdef HAVE_TPETRA_MMM_TIMINGS
6700 string label;
6701 if(!params.is_null()) label = params->get("Timer Label", label);
6702 string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6703 RCP<TimeMonitor> MM =
6704 rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6705#endif
6706
6707 // Make sure that the input argument rowTransfer is either an
6708 // Import or an Export. Import and Export are the only two
6709 // subclasses of Transfer that we defined, but users might
6710 // (unwisely, for now at least) decide to implement their own
6711 // subclasses. Exclude this possibility.
6712 const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6713 const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6714 TEUCHOS_TEST_FOR_EXCEPTION(
6715 xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6716 prefix << "The 'rowTransfer' input argument must be either an Import or "
6717 "an Export, and its template parameters must match the corresponding "
6718 "template parameters of the CrsGraph.");
6719
6720 // Make sure that the input argument domainTransfer is either an
6721 // Import or an Export. Import and Export are the only two
6722 // subclasses of Transfer that we defined, but users might
6723 // (unwisely, for now at least) decide to implement their own
6724 // subclasses. Exclude this possibility.
6725 Teuchos::RCP<const import_type> xferDomainAsImport =
6726 Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6727 Teuchos::RCP<const export_type> xferDomainAsExport =
6728 Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6729
6730 if(! domainTransfer.is_null()) {
6731
6732 TEUCHOS_TEST_FOR_EXCEPTION(
6733 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6734 prefix << "The 'domainTransfer' input argument must be either an "
6735 "Import or an Export, and its template parameters must match the "
6736 "corresponding template parameters of the CrsGraph.");
6737
6738 TEUCHOS_TEST_FOR_EXCEPTION(
6739 ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
6740 (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
6741 ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
6742 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6743 "must be of the same type (either Import or Export).");
6744
6745 TEUCHOS_TEST_FOR_EXCEPTION(
6746 ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
6747 (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
6748 ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
6749 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6750 "must be of the same type (either Import or Export).");
6751
6752 } // domainTransfer != null
6753
6754
6755 // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6756 // if the source Map is not distributed but the target Map is?
6757 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6758
6759 //
6760 // Get the caller's parameters
6761 //
6762
6763 bool reverseMode = false; // Are we in reverse mode?
6764 bool restrictComm = false; // Do we need to restrict the communicator?
6765 RCP<ParameterList> graphparams; // parameters for the destination graph
6766 if (! params.is_null()) {
6767 reverseMode = params->get("Reverse Mode", reverseMode);
6768 restrictComm = params->get("Restrict Communicator", restrictComm);
6769 graphparams = sublist(params, "CrsGraph");
6770 }
6771
6772 // Get the new domain and range Maps. We need some of them for error
6773 // checking, now that we have the reverseMode parameter.
6774 RCP<const map_type> MyRowMap = reverseMode ?
6775 rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6776 RCP<const map_type> MyColMap; // create this below
6777 RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
6778 RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
6779 RCP<const map_type> BaseRowMap = MyRowMap;
6780 RCP<const map_type> BaseDomainMap = MyDomainMap;
6781
6782 // If the user gave us a nonnull destGraph, then check whether it's
6783 // "pristine." That means that it has no entries.
6784 //
6785 // FIXME (mfh 15 May 2014) If this is not true on all processes,
6786 // then this exception test may hang. It would be better to
6787 // forward an error flag to the next communication phase.
6788 if (! destGraph.is_null()) {
6789 // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6790 // whether a graph or matrix has no entries on the calling
6791 // process, is that it is neither locally nor globally indexed.
6792 // This may change eventually with the Kokkos refactor version
6793 // of Tpetra, so it would be better just to check the quantity
6794 // of interest directly. Note that with the Kokkos refactor
6795 // version of Tpetra, asking for the total number of entries in
6796 // a graph or matrix that is not fill complete might require
6797 // computation (kernel launch), since it is not thread scalable
6798 // to update a count every time an entry is inserted.
6799 const bool NewFlag =
6800 ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
6801 TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
6802 prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6803 "if its graph is empty (neither locally nor globally indexed).");
6804
6805 // FIXME (mfh 15 May 2014) At some point, we want to change
6806 // graphs and matrices so that their DistObject Map
6807 // (this->getMap()) may differ from their row Map. This will
6808 // make redistribution for 2-D distributions more efficient. I
6809 // hesitate to change this check, because I'm not sure how much
6810 // the code here depends on getMap() and getRowMap() being the
6811 // same.
6812 TEUCHOS_TEST_FOR_EXCEPTION(
6813 ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6814 prefix << "The (row) Map of the input argument 'destGraph' is not the "
6815 "same as the (row) Map specified by the input argument 'rowTransfer'.");
6816
6817 TEUCHOS_TEST_FOR_EXCEPTION(
6818 ! destGraph->checkSizes(*this), std::invalid_argument,
6819 prefix << "You provided a nonnull destination graph, but checkSizes() "
6820 "indicates that it is not a legal legal target for redistribution from "
6821 "the source graph (*this). This may mean that they do not have the "
6822 "same dimensions.");
6823 }
6824
6825 // If forward mode (the default), then *this's (row) Map must be
6826 // the same as the source Map of the Transfer. If reverse mode,
6827 // then *this's (row) Map must be the same as the target Map of
6828 // the Transfer.
6829 //
6830 // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6831 // and matrices so that their DistObject Map (this->getMap()) may
6832 // differ from their row Map. This will make redistribution for
6833 // 2-D distributions more efficient. I hesitate to change this
6834 // check, because I'm not sure how much the code here depends on
6835 // getMap() and getRowMap() being the same.
6836 TEUCHOS_TEST_FOR_EXCEPTION(
6837 ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6838 std::invalid_argument, prefix <<
6839 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6840
6841 TEUCHOS_TEST_FOR_EXCEPTION(
6842 ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6843 std::invalid_argument, prefix <<
6844 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6845
6846 // checks for domainTransfer
6847 TEUCHOS_TEST_FOR_EXCEPTION(
6848 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6849 std::invalid_argument,
6850 prefix << "The target map of the 'domainTransfer' input argument must be "
6851 "the same as the rebalanced domain map 'domainMap'");
6852
6853 TEUCHOS_TEST_FOR_EXCEPTION(
6854 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6855 std::invalid_argument,
6856 prefix << "The source map of the 'domainTransfer' input argument must be "
6857 "the same as the rebalanced domain map 'domainMap'");
6858
6859 // The basic algorithm here is:
6860 //
6861 // 1. Call the moral equivalent of "Distor.do" to handle the import.
6862 // 2. Copy all the Imported and Copy/Permuted data into the raw
6863 // CrsGraph pointers, still using GIDs.
6864 // 3. Call an optimized version of MakeColMap that avoids the
6865 // Directory lookups (since the importer knows who owns all the
6866 // GIDs) AND reindexes to LIDs.
6867 // 4. Call expertStaticFillComplete()
6868
6869 // Get information from the Importer
6870 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6871 ArrayView<const LO> ExportLIDs = reverseMode ?
6872 rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6873 ArrayView<const LO> RemoteLIDs = reverseMode ?
6874 rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6875 ArrayView<const LO> PermuteToLIDs = reverseMode ?
6876 rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6877 ArrayView<const LO> PermuteFromLIDs = reverseMode ?
6878 rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6879 Distributor& Distor = rowTransfer.getDistributor();
6880
6881 // Owning PIDs
6882 Teuchos::Array<int> SourcePids;
6883 Teuchos::Array<int> TargetPids;
6884 int MyPID = getComm()->getRank();
6885
6886 // Temp variables for sub-communicators
6887 RCP<const map_type> ReducedRowMap, ReducedColMap,
6888 ReducedDomainMap, ReducedRangeMap;
6889 RCP<const Comm<int> > ReducedComm;
6890
6891 // If the user gave us a null destGraph, then construct the new
6892 // destination graph. We will replace its column Map later.
6893 if (destGraph.is_null()) {
6894 destGraph = rcp(new this_CRS_type(MyRowMap, 0, graphparams));
6895 }
6896
6897 /***************************************************/
6898 /***** 1) First communicator restriction phase ****/
6899 /***************************************************/
6900 if (restrictComm) {
6901 ReducedRowMap = MyRowMap->removeEmptyProcesses();
6902 ReducedComm = ReducedRowMap.is_null() ?
6903 Teuchos::null :
6904 ReducedRowMap->getComm();
6905 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6906
6907 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
6908 ReducedRowMap :
6909 MyDomainMap->replaceCommWithSubset(ReducedComm);
6910 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
6911 ReducedRowMap :
6912 MyRangeMap->replaceCommWithSubset(ReducedComm);
6913
6914 // Reset the "my" maps
6915 MyRowMap = ReducedRowMap;
6916 MyDomainMap = ReducedDomainMap;
6917 MyRangeMap = ReducedRangeMap;
6918
6919 // Update my PID, if we've restricted the communicator
6920 if (! ReducedComm.is_null()) {
6921 MyPID = ReducedComm->getRank();
6922 }
6923 else {
6924 MyPID = -2; // For debugging
6925 }
6926 }
6927 else {
6928 ReducedComm = MyRowMap->getComm();
6929 }
6930
6931 /***************************************************/
6932 /***** 2) From Tpera::DistObject::doTransfer() ****/
6933 /***************************************************/
6934#ifdef HAVE_TPETRA_MMM_TIMINGS
6935 MM = Teuchos::null;
6936 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
6937#endif
6938 // Get the owning PIDs
6939 RCP<const import_type> MyImporter = getImporter();
6940
6941 // check whether domain maps of source graph and base domain map is the same
6942 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6943
6944 if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
6945 // Same domain map as source graph
6946 //
6947 // NOTE: This won't work for restrictComm (because the Import
6948 // doesn't know the restricted PIDs), though writing an
6949 // optimized version for that case would be easy (Import an
6950 // IntVector of the new PIDs). Might want to add this later.
6951 Import_Util::getPids(*MyImporter, SourcePids, false);
6952 }
6953 else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
6954 // Same domain map as source graph (restricted communicator)
6955 // We need one import from the domain to the column map
6956 ivector_type SourceDomain_pids(getDomainMap(),true);
6957 ivector_type SourceCol_pids(getColMap());
6958 // SourceDomain_pids contains the restricted pids
6959 SourceDomain_pids.putScalar(MyPID);
6960
6961 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6962 SourcePids.resize(getColMap()->getLocalNumElements());
6963 SourceCol_pids.get1dCopy(SourcePids());
6964 }
6965 else if (MyImporter.is_null() && bSameDomainMap) {
6966 // Graph has no off-process entries
6967 SourcePids.resize(getColMap()->getLocalNumElements());
6968 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6969 }
6970 else if ( ! MyImporter.is_null() &&
6971 ! domainTransfer.is_null() ) {
6972 // general implementation for rectangular matrices with
6973 // domain map different than SourceGraph domain map.
6974 // User has to provide a DomainTransfer object. We need
6975 // to communications (import/export)
6976
6977 // TargetDomain_pids lives on the rebalanced new domain map
6978 ivector_type TargetDomain_pids(domainMap);
6979 TargetDomain_pids.putScalar(MyPID);
6980
6981 // SourceDomain_pids lives on the non-rebalanced old domain map
6982 ivector_type SourceDomain_pids(getDomainMap());
6983
6984 // SourceCol_pids lives on the non-rebalanced old column map
6985 ivector_type SourceCol_pids(getColMap());
6986
6987 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
6988 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6989 }
6990 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
6991 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6992 }
6993 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
6994 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6995 }
6996 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
6997 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6998 }
6999 else {
7000 TEUCHOS_TEST_FOR_EXCEPTION(
7001 true, std::logic_error,
7002 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7003 }
7004 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7005 SourcePids.resize(getColMap()->getLocalNumElements());
7006 SourceCol_pids.get1dCopy(SourcePids());
7007 }
7008 else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
7009 getDomainMap()->isSameAs(*getRowMap())) {
7010 // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
7011 ivector_type TargetRow_pids(domainMap);
7012 ivector_type SourceRow_pids(getRowMap());
7013 ivector_type SourceCol_pids(getColMap());
7014
7015 TargetRow_pids.putScalar(MyPID);
7016 if (! reverseMode && xferAsImport != nullptr) {
7017 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
7018 }
7019 else if (reverseMode && xferAsExport != nullptr) {
7020 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
7021 }
7022 else if (! reverseMode && xferAsExport != nullptr) {
7023 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
7024 }
7025 else if (reverseMode && xferAsImport != nullptr) {
7026 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
7027 }
7028 else {
7029 TEUCHOS_TEST_FOR_EXCEPTION(
7030 true, std::logic_error,
7031 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7032 }
7033 SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
7034 SourcePids.resize(getColMap()->getLocalNumElements());
7035 SourceCol_pids.get1dCopy(SourcePids());
7036 }
7037 else {
7038 TEUCHOS_TEST_FOR_EXCEPTION(
7039 true, std::invalid_argument,
7040 prefix << "This method only allows either domainMap == getDomainMap(), "
7041 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7042 }
7043
7044 // Tpetra-specific stuff
7045 size_t constantNumPackets = destGraph->constantNumberOfPackets();
7046 if (constantNumPackets == 0) {
7047 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7048 RemoteLIDs.size());
7049 }
7050 else {
7051 // There are a constant number of packets per element. We
7052 // already know (from the number of "remote" (incoming)
7053 // elements) how many incoming elements we expect, so we can
7054 // resize the buffer accordingly.
7055 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7056 destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7057 }
7058
7059 {
7060 // packAndPrepare* methods modify numExportPacketsPerLID_.
7061 destGraph->numExportPacketsPerLID_.modify_host();
7062 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7063 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7064
7065 // Pack & Prepare w/ owning PIDs
7066 packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7067 numExportPacketsPerLID, ExportLIDs,
7068 SourcePids, constantNumPackets);
7069 }
7070
7071 // Do the exchange of remote data.
7072#ifdef HAVE_TPETRA_MMM_TIMINGS
7073 MM = Teuchos::null;
7074 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7075#endif
7076
7077 if (communication_needed) {
7078 if (reverseMode) {
7079 if (constantNumPackets == 0) { // variable number of packets per LID
7080 // Make sure that host has the latest version, since we're
7081 // using the version on host. If host has the latest
7082 // version, syncing to host does nothing.
7083 destGraph->numExportPacketsPerLID_.sync_host();
7084 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7085 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7086 destGraph->numImportPacketsPerLID_.sync_host();
7087 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7088 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7089
7090 Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7091 destGraph->numImportPacketsPerLID_.view_host());
7092 size_t totalImportPackets = 0;
7093 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7094 totalImportPackets += numImportPacketsPerLID[i];
7095 }
7096
7097 // Reallocation MUST go before setting the modified flag,
7098 // because it may clear out the flags.
7099 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7100 destGraph->imports_.modify_host();
7101 auto hostImports = destGraph->imports_.view_host();
7102 // This is a legacy host pack/unpack path, so use the host
7103 // version of exports_.
7104 destGraph->exports_.sync_host();
7105 auto hostExports = destGraph->exports_.view_host();
7106 Distor.doReversePostsAndWaits(hostExports,
7107 numExportPacketsPerLID,
7108 hostImports,
7109 numImportPacketsPerLID);
7110 }
7111 else { // constant number of packets per LI
7112 destGraph->imports_.modify_host();
7113 auto hostImports = destGraph->imports_.view_host();
7114 // This is a legacy host pack/unpack path, so use the host
7115 // version of exports_.
7116 destGraph->exports_.sync_host();
7117 auto hostExports = destGraph->exports_.view_host();
7118 Distor.doReversePostsAndWaits(hostExports,
7119 constantNumPackets,
7120 hostImports);
7121 }
7122 }
7123 else { // forward mode (the default)
7124 if (constantNumPackets == 0) { // variable number of packets per LID
7125 // Make sure that host has the latest version, since we're
7126 // using the version on host. If host has the latest
7127 // version, syncing to host does nothing.
7128 destGraph->numExportPacketsPerLID_.sync_host();
7129 destGraph->numImportPacketsPerLID_.sync_host();
7130 Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7131 destGraph->numImportPacketsPerLID_.view_host());
7132
7133 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7134 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7135 size_t totalImportPackets = 0;
7136 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7137 totalImportPackets += numImportPacketsPerLID[i];
7138 }
7139
7140 // Reallocation MUST go before setting the modified flag,
7141 // because it may clear out the flags.
7142 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7143 destGraph->imports_.modify_host();
7144 auto hostImports = destGraph->imports_.view_host();
7145 // This is a legacy host pack/unpack path, so use the host
7146 // version of exports_.
7147 destGraph->exports_.sync_host();
7148 auto hostExports = destGraph->exports_.view_host();
7149 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7150 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7151 Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
7152 }
7153 else { // constant number of packets per LID
7154 destGraph->imports_.modify_host();
7155 auto hostImports = destGraph->imports_.view_host();
7156 // This is a legacy host pack/unpack path, so use the host
7157 // version of exports_.
7158 destGraph->exports_.sync_host();
7159 auto hostExports = destGraph->exports_.view_host();
7160 Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
7161 }
7162 }
7163 }
7164
7165 /*********************************************************************/
7166 /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7167 /*********************************************************************/
7168
7169#ifdef HAVE_TPETRA_MMM_TIMINGS
7170 MM = Teuchos::null;
7171 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7172#endif
7173
7174 // Backwards compatibility measure. We'll use this again below.
7175 destGraph->numImportPacketsPerLID_.sync_host();
7176 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7177 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7178 destGraph->imports_.sync_host();
7179 Teuchos::ArrayView<const packet_type> hostImports =
7180 getArrayViewFromDualView(destGraph->imports_);
7181 size_t mynnz =
7182 unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7183 numImportPacketsPerLID,
7184 constantNumPackets, INSERT,
7185 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7186 size_t N = BaseRowMap->getLocalNumElements();
7187
7188 // Allocations
7189 ArrayRCP<size_t> CSR_rowptr(N+1);
7190 ArrayRCP<GO> CSR_colind_GID;
7191 ArrayRCP<LO> CSR_colind_LID;
7192 CSR_colind_GID.resize(mynnz);
7193
7194 // If LO and GO are the same, we can reuse memory when
7195 // converting the column indices from global to local indices.
7196 if (typeid(LO) == typeid(GO)) {
7197 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7198 }
7199 else {
7200 CSR_colind_LID.resize(mynnz);
7201 }
7202
7203 // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7204 // unpackAndCombine method on a "CrsArrays" object? This passing
7205 // in a huge list of arrays is icky. Can't we have a bit of an
7206 // abstraction? Implementing a concrete DistObject subclass only
7207 // takes five methods.
7208 unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7209 numImportPacketsPerLID, constantNumPackets,
7210 INSERT, NumSameIDs, PermuteToLIDs,
7211 PermuteFromLIDs, N, mynnz, MyPID,
7212 CSR_rowptr(), CSR_colind_GID(),
7213 SourcePids(), TargetPids);
7214
7215 /**************************************************************/
7216 /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7217 /**************************************************************/
7218#ifdef HAVE_TPETRA_MMM_TIMINGS
7219 MM = Teuchos::null;
7220 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7221#endif
7222 // Call an optimized version of makeColMap that avoids the
7223 // Directory lookups (since the Import object knows who owns all
7224 // the GIDs).
7225 Teuchos::Array<int> RemotePids;
7227 CSR_colind_LID(),
7228 CSR_colind_GID(),
7229 BaseDomainMap,
7230 TargetPids, RemotePids,
7231 MyColMap);
7232
7233 /*******************************************************/
7234 /**** 4) Second communicator restriction phase ****/
7235 /*******************************************************/
7236 if (restrictComm) {
7237 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7238 ReducedRowMap :
7239 MyColMap->replaceCommWithSubset(ReducedComm);
7240 MyColMap = ReducedColMap; // Reset the "my" maps
7241 }
7242
7243 // Replace the col map
7244 destGraph->replaceColMap(MyColMap);
7245
7246 // Short circuit if the processor is no longer in the communicator
7247 //
7248 // NOTE: Epetra replaces modifies all "removed" processes so they
7249 // have a dummy (serial) Map that doesn't touch the original
7250 // communicator. Duplicating that here might be a good idea.
7251 if (ReducedComm.is_null()) {
7252 return;
7253 }
7254
7255 /***************************************************/
7256 /**** 5) Sort ****/
7257 /***************************************************/
7258 if ((! reverseMode && xferAsImport != nullptr) ||
7259 (reverseMode && xferAsExport != nullptr)) {
7260 Import_Util::sortCrsEntries(CSR_rowptr(),
7261 CSR_colind_LID());
7262 }
7263 else if ((! reverseMode && xferAsExport != nullptr) ||
7264 (reverseMode && xferAsImport != nullptr)) {
7266 CSR_colind_LID());
7267 if (CSR_rowptr[N] != mynnz) {
7268 CSR_colind_LID.resize(CSR_rowptr[N]);
7269 }
7270 }
7271 else {
7272 TEUCHOS_TEST_FOR_EXCEPTION(
7273 true, std::logic_error,
7274 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7275 }
7276 /***************************************************/
7277 /**** 6) Reset the colmap and the arrays ****/
7278 /***************************************************/
7279
7280 // Call constructor for the new graph (restricted as needed)
7281 //
7282 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7283
7284 /***************************************************/
7285 /**** 7) Build Importer & Call ESFC ****/
7286 /***************************************************/
7287 // Pre-build the importer using the existing PIDs
7288 Teuchos::ParameterList esfc_params;
7289#ifdef HAVE_TPETRA_MMM_TIMINGS
7290 MM = Teuchos::null;
7291 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7292#endif
7293 RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7294#ifdef HAVE_TPETRA_MMM_TIMINGS
7295 MM = Teuchos::null;
7296 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7297
7298 esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7299#endif
7300 if(!params.is_null())
7301 esfc_params.set("compute global constants",params->get("compute global constants",true));
7302
7303 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7304 MyImport, Teuchos::null, rcp(&esfc_params,false));
7305
7306 }
7307
7308 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7309 void
7311 importAndFillComplete(Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& destGraph,
7312 const import_type& importer,
7313 const Teuchos::RCP<const map_type>& domainMap,
7314 const Teuchos::RCP<const map_type>& rangeMap,
7315 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7316 {
7317 transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7318 }
7319
7320 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7321 void
7323 importAndFillComplete(Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& destGraph,
7324 const import_type& rowImporter,
7325 const import_type& domainImporter,
7326 const Teuchos::RCP<const map_type>& domainMap,
7327 const Teuchos::RCP<const map_type>& rangeMap,
7328 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7329 {
7330 transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7331 }
7332
7333 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7334 void
7336 exportAndFillComplete(Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& destGraph,
7337 const export_type& exporter,
7338 const Teuchos::RCP<const map_type>& domainMap,
7339 const Teuchos::RCP<const map_type>& rangeMap,
7340 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7341 {
7342 transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7343 }
7344
7345 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7346 void
7348 exportAndFillComplete(Teuchos::RCP<CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >& destGraph,
7349 const export_type& rowExporter,
7350 const export_type& domainExporter,
7351 const Teuchos::RCP<const map_type>& domainMap,
7352 const Teuchos::RCP<const map_type>& rangeMap,
7353 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7354 {
7355 transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7356 }
7357
7358
7359 template<class LocalOrdinal, class GlobalOrdinal, class Node>
7360 void
7362 swap(CrsGraph<LocalOrdinal, GlobalOrdinal, Node>& graph)
7363 {
7364 std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
7365
7366 std::swap(graph.rowMap_, this->rowMap_);
7367 std::swap(graph.colMap_, this->colMap_);
7368 std::swap(graph.rangeMap_, this->rangeMap_);
7369 std::swap(graph.domainMap_, this->domainMap_);
7370
7371 std::swap(graph.importer_, this->importer_);
7372 std::swap(graph.exporter_, this->exporter_);
7373
7374 std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7375
7376 std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7377 std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7378
7379 std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7380
7381 std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
7382 std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
7383
7384 std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
7385 std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
7386 std::swap(graph.packedUnpackedRowPtrsMatch_, this->packedUnpackedRowPtrsMatch_);
7387
7388 std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
7389
7390 std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
7391 std::swap(graph.gblInds_wdv, this->gblInds_wdv);
7392 std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
7393
7394 std::swap(graph.storageStatus_, this->storageStatus_);
7395
7396 std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7397 std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7398 std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7399 std::swap(graph.fillComplete_, this->fillComplete_);
7400 std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7401 std::swap(graph.noRedundancies_, this->noRedundancies_);
7402 std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7403 std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7404 std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
7405
7406 std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7407
7408 std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7409 std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7410 std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7411 }
7412
7413
7414 template<class LocalOrdinal, class GlobalOrdinal, class Node>
7415 bool
7417 isIdenticalTo(const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> & graph) const
7418 {
7419 auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7420 bool output = true;
7421 output = m1.size() == m2.size() ? output : false;
7422 for(auto & it_m: m1)
7423 {
7424 size_t key = it_m.first;
7425 output = m2.find(key) != m2.end() ? output : false;
7426 if(output)
7427 {
7428 auto v1 = m1.find(key)->second;
7429 auto v2 = m2.find(key)->second;
7430 std::sort(v1.begin(), v1.end());
7431 std::sort(v2.begin(), v2.end());
7432
7433 output = v1.size() == v2.size() ? output : false;
7434 for(size_t i=0; output && i<v1.size(); i++)
7435 {
7436 output = v1[i]==v2[i] ? output : false;
7437 }
7438 }
7439 }
7440 return output;
7441 };
7442
7443 bool output = true;
7444
7445 output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7446 output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7447 output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7448 output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7449
7450 output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7451
7452 output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7453 output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7454
7455 output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7456
7457 output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7458
7459 output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7460 output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7461 output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7462 output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7463 output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7464 output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7465 output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7466 output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7467 output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7469
7470 // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7471 // nonlocals_ isa std::map<GO, std::vector<GO> >
7472 output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7473
7474 // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
7475 // - since this is a HostMirror type, it should be in host memory already
7476 output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7477 if(output && this->k_numAllocPerRow_.extent(0) > 0)
7478 {
7479 for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
7480 output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7481 }
7482
7483 // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
7484 // - since this is a HostMirror type, it should be in host memory already
7485 output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7486 if(output && this->k_numRowEntries_.extent(0) > 0)
7487 {
7488 for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7489 output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7490 }
7491
7492 // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7493 {
7494 auto rowPtrsThis = this->getRowPtrsUnpackedHost();
7495 auto rowPtrsGraph = graph.getRowPtrsUnpackedHost();
7496 output = rowPtrsThis .extent(0) == rowPtrsGraph.extent(0) ? output : false;
7497 for(size_t i=0; output && i< rowPtrsThis.extent(0); i++)
7498 output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7499 }
7500
7501 // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7502 output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7503 if(output && this->lclIndsUnpacked_wdv.extent(0) > 0)
7504 {
7505 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7506 auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7507 for(size_t i=0; output && i < indThis.extent(0); i++)
7508 output = indThis(i) == indGraph(i) ? output : false;
7509 }
7510
7511 // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7512 output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7513 if(output && this->gblInds_wdv.extent(0) > 0)
7514 {
7515 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7516 auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7517 for(size_t i=0; output && i<indtThis.extent(0); i++)
7518 output = indtThis(i) == indtGraph(i) ? output : false;
7519 }
7520
7521 // Check lclGraph_ isa
7522 // Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7523 // Kokkos::StaticCrsGraph has 3 data members in it:
7524 // Kokkos::View<size_type*, ...> row_map
7525 // (local_graph_device_type::row_map_type)
7526 // Kokkos::View<data_type*, ...> entries
7527 // (local_graph_device_type::entries_type)
7528 // Kokkos::View<size_type*, ...> row_block_offsets
7529 // (local_graph_device_type::row_block_type)
7530 // There is currently no Kokkos::StaticCrsGraph comparison function
7531 // that's built-in, so we will just compare
7532 // the three data items here. This can be replaced if Kokkos ever
7533 // puts in its own comparison routine.
7534 local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7535 local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7536
7537 output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7538 ? output : false;
7539 if(output && thisLclGraph.row_map.extent(0) > 0)
7540 {
7541 auto lclGraph_rowmap_host_this = thisLclGraph.row_map;
7542 auto lclGraph_rowmap_host_graph = graphLclGraph.row_map;
7543 for (size_t i=0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7544 output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i)
7545 ? output : false;
7546 }
7547
7548 output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7549 ? output : false;
7550 if(output && thisLclGraph.entries.extent(0) > 0)
7551 {
7552 auto lclGraph_entries_host_this = thisLclGraph.entries;
7553 auto lclGraph_entries_host_graph = graphLclGraph.entries;
7554 for (size_t i=0; output && i < lclGraph_entries_host_this.extent(0); i++)
7555 output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i)
7556 ? output : false;
7557 }
7558
7559 output =
7560 thisLclGraph.row_block_offsets.extent(0) ==
7561 graphLclGraph.row_block_offsets.extent(0) ? output : false;
7562 if(output && thisLclGraph.row_block_offsets.extent(0) > 0)
7563 {
7564 auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7565 auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7566 for (size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7567 output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i)
7568 ? output : false;
7569 }
7570
7571 // For Importer and Exporter, we don't need to explicitly check them since
7572 // they will be consistent with the maps.
7573 // Note: importer_ isa Teuchos::RCP<const import_type>
7574 // exporter_ isa Teuchos::RCP<const export_type>
7575
7576 return output;
7577 }
7578
7579
7580
7581} // namespace Tpetra
7582
7583//
7584// Explicit instantiation macros
7585//
7586// Must be expanded from within the Tpetra namespace!
7587//
7588
7589#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7590 template<> \
7591 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7592 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7593 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7594 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7595 CrsGraph<LO,GO,NODE>::node_type>& importer, \
7596 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7597 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7598 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7599 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7600 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7601 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7602 const Teuchos::RCP<Teuchos::ParameterList>& params);
7603
7604#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7605 template<> \
7606 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7607 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7608 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7609 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7610 CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7611 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7612 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7613 CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7614 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7615 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7616 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7617 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7618 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7619 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7620 const Teuchos::RCP<Teuchos::ParameterList>& params);
7621
7622
7623#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7624 template<> \
7625 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7626 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7627 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7628 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7629 CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7630 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7631 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7632 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7633 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7634 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7635 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7636 const Teuchos::RCP<Teuchos::ParameterList>& params);
7637
7638#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7639 template<> \
7640 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7641 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7642 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7643 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7644 CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7645 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7646 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7647 CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7648 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7649 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7650 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7651 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7652 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7653 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7654 const Teuchos::RCP<Teuchos::ParameterList>& params);
7655
7656
7657#define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7658 template class CrsGraph<LO, GO, NODE>; \
7659 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7660 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7661 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7662 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
7663
7664
7665#endif // TPETRA_CRSGRAPH_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Functions for manipulating CRS arrays.
Declaration of a function that prints strings from each process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Utility functions for packing and unpacking sparse matrix entries.
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowptr, const Teuchos::ArrayView< LocalOrdinal > &colind_LID, const Teuchos::ArrayView< GlobalOrdinal > &colind_GID, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMapRCP, const Teuchos::ArrayView< const int > &owningPIDs, Teuchos::Array< int > &remotePIDs, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap)
lowCommunicationMakeColMapAndReindex
void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
void getPids(const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer, Teuchos::Array< int > &pids, bool use_minus_one_for_local)
Like getPidGidPairs, but just gets the PIDs, ordered by the column Map.
Stand-alone utility functions and macros.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
Details::EStorageStatus storageStatus_
Status of the graph's storage, when not in a fill-complete state.
::Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > import_type
The Import specialization used by this class.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
GlobalOrdinal global_ordinal_type
The type of the graph's global indices.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
local_inds_device_view_type getLocalIndicesDevice() const
Get a device view of the packed column indicies.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void globalAssemble()
Communicate nonlocal contributions to other processes.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows.
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
const row_ptrs_device_view_type & getRowPtrsUnpackedDevice() const
Get the unpacked row pointers on device.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
bool hasColMap() const override
Whether the graph has a column Map.
LocalOrdinal local_ordinal_type
The type of the graph's local indices.
std::string description() const override
Return a one-line human-readable description of this object.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
typename Node::device_type device_type
This class' Kokkos device type.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
typename row_graph_type::local_inds_device_view_type local_inds_device_view_type
The Kokkos::View type for views of local ordinals on device and host.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
typename local_graph_device_type::HostMirror local_graph_host_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
row_ptrs_host_view_type getLocalRowPtrsHost() const
Get a host view of the packed row offsets.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph's data directly, using 1-D storage.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
local_inds_host_view_type getLocalIndicesHost() const
Get a host view of the packed column indicies.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
::Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > map_type
The Map specialization used by this class.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row's global column indices.
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph's column Map.
bool haveGlobalConstants_
Whether all processes have computed global constants.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
typename device_type::execution_space execution_space
This class' Kokkos execution space.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
row_ptrs_device_view_type getLocalRowPtrsDevice() const
Get a device view of the packed row offsets.
void getLocalRowCopy(local_ordinal_type gblRow, nonconst_local_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
global_size_t globalNumEntries_
Global number of entries in the graph.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
::Tpetra::Export< LocalOrdinal, GlobalOrdinal, Node > export_type
The Export specialization used by this class.
size_t getLocalNumEntries() const override
The local number of entries in the graph.
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true Built during fillCo...
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
const row_ptrs_host_view_type & getRowPtrsPackedHost() const
Get the packed row pointers on host. Lazily make a copy from device.
size_t getLocalNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
Node node_type
This class' Kokkos Node type.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Kokkos::View< constsize_t *, device_type >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
offset_device_view_type k_offRankOffsets_
The offsets for off-rank entries.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
void setLocallyModified()
Report that we made a local modification to its structure.
size_t getLocalAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
const row_ptrs_device_view_type & getRowPtrsPackedDevice() const
Get the packed row pointers on device.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row's local column indices.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
size_t getLocalNumRows() const override
Returns the number of graph rows owned on the calling node.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph's current column Map with the given Map.
bool haveLocalConstants_
Whether this process has computed local constants.
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Sets up and executes a communication plan for a Tpetra DistObject.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
An abstract interface for graphs accessed by rows.
virtual bool isFillComplete() const =0
Whether fillComplete() has been called (without an intervening resumeFill()).
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes this graph's distribution of rows over processes.
virtual void getGlobalRowCopy(const GlobalOrdinal gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const =0
Get a copy of the global column indices in a given row of the graph.
virtual size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const =0
Returns the current number of entries on this node in the specified global row.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
Implementation details of Tpetra.
Nonmember function that computes a residual Computes R = B - A * X.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void disableWDVTracking()
Disable WrappedDualView reference-count tracking and syncing. Call this before entering a host-parall...
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph's global column indices into local column indices.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
void enableWDVTracking()
Enable WrappedDualView reference-count tracking and syncing. Call this after exiting a host-parallel ...
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
CombineMode
Rule for combining data in an Import or Export.
@ INSERT
Insert new values that don't currently exist.
Traits class for packing / unpacking data of type T.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.