Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Import_Util2.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_IMPORT_UTIL2_HPP
11#define TPETRA_IMPORT_UTIL2_HPP
12
17
18#include "Tpetra_ConfigDefs.hpp"
19#include "Tpetra_Import.hpp"
20#include "Tpetra_HashTable.hpp"
21#include "Tpetra_Map.hpp"
22#include "Tpetra_Util.hpp"
23#include "Tpetra_Distributor.hpp"
26#include "Tpetra_Vector.hpp"
27#include "Kokkos_DualView.hpp"
28#include "KokkosSparse_SortCrs.hpp"
29#include <Teuchos_Array.hpp>
31#include <Kokkos_UnorderedMap.hpp>
32#include <unordered_map>
33#include <utility>
34#include <set>
35
37
38#include <Kokkos_Core.hpp>
39#include <Kokkos_Sort.hpp>
40
41namespace Tpetra {
42namespace Import_Util {
43
46template<typename Scalar, typename Ordinal>
47void
48sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
49 const Teuchos::ArrayView<Ordinal>& CRS_colind,
50 const Teuchos::ArrayView<Scalar>&CRS_vals);
51
52template<typename Ordinal>
53void
54sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
55 const Teuchos::ArrayView<Ordinal>& CRS_colind);
56
57template<typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
58void
59sortCrsEntries (const rowptr_array_type& CRS_rowptr,
60 const colind_array_type& CRS_colind,
61 const vals_array_type& CRS_vals);
62
63template<typename rowptr_array_type, typename colind_array_type>
64void
65sortCrsEntries (const rowptr_array_type& CRS_rowptr,
66 const colind_array_type& CRS_colind);
67
72template<typename Scalar, typename Ordinal>
73void
74sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
75 const Teuchos::ArrayView<Ordinal>& CRS_colind,
76 const Teuchos::ArrayView<Scalar>& CRS_vals);
77
78template<typename Ordinal>
79void
80sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
81 const Teuchos::ArrayView<Ordinal>& CRS_colind);
82
83template<class rowptr_view_type, class colind_view_type, class vals_view_type>
84void
85sortAndMergeCrsEntries (const rowptr_view_type& CRS_rowptr,
86 const colind_view_type& CRS_colind,
87 const vals_view_type& CRS_vals);
88
104template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
105void
107 const Teuchos::ArrayView<const size_t> &rowptr,
108 const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
109 const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
110 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
111 const Teuchos::ArrayView<const int> &owningPIDs,
112 Teuchos::Array<int> &remotePIDs,
113 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap);
114
119template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
120void
122 const Kokkos::View<size_t*,typename Node::device_type> rowptr_view,
123 const Kokkos::View<LocalOrdinal*,typename Node::device_type> colind_LID_view,
124 const Kokkos::View<GlobalOrdinal*,typename Node::device_type> colind_GID_view,
125 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
126 const Teuchos::ArrayView<const int> &owningPIDs,
127 Teuchos::Array<int> &remotePIDs,
128 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap);
129
143 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
144 void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
145 bool useReverseModeForOwnership,
146 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferForMigratingData,
147 bool useReverseModeForMigration,
149
150} // namespace Import_Util
151} // namespace Tpetra
152
153
154//
155// Implementations
156//
157
158namespace Tpetra {
159namespace Import_Util {
160
161
162template<typename PID, typename GlobalOrdinal>
163bool sort_PID_then_GID(const std::pair<PID,GlobalOrdinal> &a,
164 const std::pair<PID,GlobalOrdinal> &b)
165{
166 if(a.first!=b.first)
167 return (a.first < b.first);
168 return (a.second < b.second);
169}
170
171template<typename PID,
172 typename GlobalOrdinal,
173 typename LocalOrdinal>
174bool sort_PID_then_pair_GID_LID(const std::pair<PID, std::pair< GlobalOrdinal, LocalOrdinal > > &a,
175 const std::pair<PID, std::pair< GlobalOrdinal, LocalOrdinal > > &b)
176{
177 if(a.first!=b.first)
178 return a.first < b.first;
179 else
180 return (a.second.first < b.second.first);
181}
182
183template<typename Scalar,
184 typename LocalOrdinal,
185 typename GlobalOrdinal,
186 typename Node>
187void
188reverseNeighborDiscovery(const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> & SourceMatrix,
189 const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type & rowptr,
190 const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type & colind,
191 const Tpetra::Details::Transfer<LocalOrdinal,GlobalOrdinal,Node>& RowTransfer,
192 Teuchos::RCP<const Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > MyImporter,
193 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MyDomainMap,
194 Teuchos::ArrayRCP<int>& type3PIDs,
195 Teuchos::ArrayRCP<LocalOrdinal>& type3LIDs,
196 Teuchos::RCP<const Teuchos::Comm<int> >& rcomm)
197{
198#ifdef HAVE_TPETRACORE_MPI
199 using Teuchos::TimeMonitor;
200 using ::Tpetra::Details::Behavior;
201 typedef LocalOrdinal LO;
202 typedef GlobalOrdinal GO;
203 typedef std::pair<GO,GO> pidgidpair_t;
204 using Teuchos::RCP;
205 const std::string prefix {" Import_Util2::ReverseND:: "};
206 const std::string label ("IU2::Neighbor");
207
208 // There can be no neighbor discovery if you don't have an importer
209 if(MyImporter.is_null()) return;
210
211 std::ostringstream errstr;
212 bool error = false;
213 auto const comm = MyDomainMap->getComm();
214
215 MPI_Comm rawComm = getRawMpiComm(*comm);
216 const int MyPID = rcomm->getRank ();
217
218 // Things related to messages I am sending in forward mode (RowTransfer)
219 // *** Note: this will be incorrect for transferAndFillComplete if it is in reverse mode. FIXME cbl.
220 auto ExportPIDs = RowTransfer.getExportPIDs();
221 auto ExportLIDs = RowTransfer.getExportLIDs();
222 auto NumExportLIDs = RowTransfer.getNumExportIDs();
223
224 Distributor & Distor = MyImporter->getDistributor();
225 const size_t NumRecvs = Distor.getNumReceives();
226 const size_t NumSends = Distor.getNumSends();
227 auto RemoteLIDs = MyImporter->getRemoteLIDs();
228 auto const ProcsFrom = Distor.getProcsFrom();
229 auto const ProcsTo = Distor.getProcsTo();
230
231 auto LengthsFrom = Distor.getLengthsFrom();
232 auto MyColMap = SourceMatrix.getColMap();
233 const size_t numCols = MyColMap->getLocalNumElements ();
234 RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > target = MyImporter->getTargetMap();
235
236 // Get the owning pids in a special way,
237 // s.t. ProcsFrom[RemotePIDs[i]] is the proc that owns RemoteLIDs[j]....
238 Teuchos::Array<int> RemotePIDOrder(numCols,-1);
239
240 // For each remote ID, record index into ProcsFrom, who owns it.
241 for (size_t i = 0, j = 0; i < NumRecvs; ++i) {
242 for (size_t k = 0; k < LengthsFrom[i]; ++k) {
243 const int pid = ProcsFrom[i];
244 if (pid != MyPID) {
245 RemotePIDOrder[RemoteLIDs[j]] = i;
246 }
247 j++;
248 }
249 }
250
251 // Step One: Start tacking the (GID,PID) pairs on the std sets
252 //
253 // For each index in ProcsFrom, we will insert into a set of (PID,
254 // GID) pairs, in order to build a list of such pairs for each of
255 // those processes. Since this is building a reverse, we will send
256 // to these processes.
257 Teuchos::Array<int> ReverseSendSizes(NumRecvs,0);
258 // do this as C array to avoid Teuchos::Array value initialization of all reserved memory
259 Teuchos::Array< Teuchos::ArrayRCP<pidgidpair_t > > RSB(NumRecvs);
260
261 {
262#ifdef HAVE_TPETRA_MMM_TIMINGS
263 TimeMonitor set_all(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSB")));
264#endif
265
266 // 25 Jul 2018: CBL
267 // todo:std::unordered_set (hash table),
268 // with an adequate prereservation ("bucket count").
269 // An onordered_set has to have a custom hasher for pid/gid pair
270 // However, when pidsets is copied to RSB, it will be in key
271 // order _not_ in pid,gid order. (unlike std::set).
272 // Impliment this with a reserve, and time BOTH building pidsets
273 // _and_ the sort after the receive. Even if unordered_set saves
274 // time, if it causes the sort to be longer, it's not a win.
275
276 Teuchos::Array<std::set<pidgidpair_t>> pidsets(NumRecvs);
277 {
278#ifdef HAVE_TPETRA_MMM_TIMINGS
279 TimeMonitor set_insert(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSBinsert")));
280#endif
281 for(size_t i=0; i < NumExportLIDs; i++) {
282 LO lid = ExportLIDs[i];
283 GO exp_pid = ExportPIDs[i];
284 for(auto j=rowptr[lid]; j<rowptr[lid+1]; j++){
285 int pid_order = RemotePIDOrder[colind[j]];
286 if(pid_order!=-1) {
287 GO gid = MyColMap->getGlobalElement(colind[j]); //Epetra SM.GCID46 =>sm->graph-> {colmap(colind)}
288 auto tpair = pidgidpair_t(exp_pid,gid);
289 pidsets[pid_order].insert(pidsets[pid_order].end(),tpair);
290 }
291 }
292 }
293 }
294
295 {
296#ifdef HAVE_TPETRA_MMM_TIMINGS
297 TimeMonitor set_cpy(*TimeMonitor::getNewTimer(prefix + std::string("isMMallSetRSBcpy")));
298#endif
299 int jj = 0;
300 for(auto &&ps : pidsets) {
301 auto s = ps.size();
302 RSB[jj] = Teuchos::arcp(new pidgidpair_t[ s ],0, s ,true);
303 std::copy(ps.begin(),ps.end(),RSB[jj]);
304 ReverseSendSizes[jj]=s;
305 ++jj;
306 }
307 }
308 } // end of set based packing.
309
310 Teuchos::Array<int> ReverseRecvSizes(NumSends,-1);
311 Teuchos::Array<MPI_Request> rawBreq(ProcsFrom.size()+ProcsTo.size(), MPI_REQUEST_NULL);
312 // 25 Jul 2018: MPI_TAG_UB is the largest tag value; could be < 32768.
313 const int mpi_tag_base_ = 3;
314
315 int mpireq_idx=0;
316 for(int i=0;i<ProcsTo.size();++i) {
317 int Rec_Tag = mpi_tag_base_ + ProcsTo[i];
318 int * thisrecv = (int *) (&ReverseRecvSizes[i]);
319 MPI_Request rawRequest = MPI_REQUEST_NULL;
320 MPI_Irecv(const_cast<int*>(thisrecv),
321 1,
322 MPI_INT,
323 ProcsTo[i],
324 Rec_Tag,
325 rawComm,
326 &rawRequest);
327 rawBreq[mpireq_idx++]=rawRequest;
328 }
329 for(int i=0;i<ProcsFrom.size();++i) {
330 int Send_Tag = mpi_tag_base_ + MyPID;
331 int * mysend = ( int *)(&ReverseSendSizes[i]);
332 MPI_Request rawRequest = MPI_REQUEST_NULL;
333 MPI_Isend(mysend,
334 1,
335 MPI_INT,
336 ProcsFrom[i],
337 Send_Tag,
338 rawComm,
339 &rawRequest);
340 rawBreq[mpireq_idx++]=rawRequest;
341 }
342 Teuchos::Array<MPI_Status> rawBstatus(rawBreq.size());
343#ifdef HAVE_TPETRA_DEBUG
344 const int err1 =
345#endif
346 MPI_Waitall (rawBreq.size(), rawBreq.getRawPtr(),
347 rawBstatus.getRawPtr());
348
349
350#ifdef HAVE_TPETRA_DEBUG
351 if(err1) {
352 errstr <<MyPID<< "sE1 reverseNeighborDiscovery Mpi_Waitall error on send ";
353 error=true;
354 std::cerr<<errstr.str()<<std::flush;
355 }
356#endif
357
358 int totalexportpairrecsize = 0;
359 for (size_t i = 0; i < NumSends; ++i) {
360 totalexportpairrecsize += ReverseRecvSizes[i];
361#ifdef HAVE_TPETRA_DEBUG
362 if(ReverseRecvSizes[i]<0) {
363 errstr << MyPID << "E4 reverseNeighborDiscovery: got < 0 for receive size "<<ReverseRecvSizes[i]<<std::endl;
364 error=true;
365 }
366#endif
367 }
368 Teuchos::ArrayRCP<pidgidpair_t >AllReverseRecv= Teuchos::arcp(new pidgidpair_t[totalexportpairrecsize],0,totalexportpairrecsize,true);
369 int offset = 0;
370 mpireq_idx=0;
371 for(int i=0;i<ProcsTo.size();++i) {
372 int recv_data_size = ReverseRecvSizes[i]*2;
373 int recvData_MPI_Tag = mpi_tag_base_*2 + ProcsTo[i];
374 MPI_Request rawRequest = MPI_REQUEST_NULL;
375 GO * rec_bptr = (GO*) (&AllReverseRecv[offset]);
376 offset+=ReverseRecvSizes[i];
377 MPI_Irecv(rec_bptr,
378 recv_data_size,
379 ::Tpetra::Details::MpiTypeTraits<GO>::getType(rec_bptr[0]),
380 ProcsTo[i],
381 recvData_MPI_Tag,
382 rawComm,
383 &rawRequest);
384 rawBreq[mpireq_idx++]=rawRequest;
385 }
386 for(int ii=0;ii<ProcsFrom.size();++ii) {
387 GO * send_bptr = (GO*) (RSB[ii].getRawPtr());
388 MPI_Request rawSequest = MPI_REQUEST_NULL;
389 int send_data_size = ReverseSendSizes[ii]*2; // 2 == count of pair
390 int sendData_MPI_Tag = mpi_tag_base_*2+MyPID;
391 MPI_Isend(send_bptr,
392 send_data_size,
393 ::Tpetra::Details::MpiTypeTraits<GO>::getType(send_bptr[0]),
394 ProcsFrom[ii],
395 sendData_MPI_Tag,
396 rawComm,
397 &rawSequest);
398
399 rawBreq[mpireq_idx++]=rawSequest;
400 }
401#ifdef HAVE_TPETRA_DEBUG
402 const int err =
403#endif
404 MPI_Waitall (rawBreq.size(),
405 rawBreq.getRawPtr(),
406 rawBstatus.getRawPtr());
407#ifdef HAVE_TPETRA_DEBUG
408 if(err) {
409 errstr <<MyPID<< "E3.r reverseNeighborDiscovery Mpi_Waitall error on receive ";
410 error=true;
411 std::cerr<<errstr.str()<<std::flush;
412 }
413#endif
414 std::sort(AllReverseRecv.begin(), AllReverseRecv.end(), Tpetra::Import_Util::sort_PID_then_GID<GlobalOrdinal, GlobalOrdinal>);
415
416 auto newEndOfPairs = std::unique(AllReverseRecv.begin(), AllReverseRecv.end());
417// don't resize to remove non-unique, just use the end-of-unique iterator
418 if(AllReverseRecv.begin() == newEndOfPairs) return;
419 int ARRsize = std::distance(AllReverseRecv.begin(),newEndOfPairs);
420 auto rPIDs = Teuchos::arcp(new int[ARRsize],0,ARRsize,true);
421 auto rLIDs = Teuchos::arcp(new LocalOrdinal[ARRsize],0,ARRsize,true);
422
423 int tsize=0;
424 for(auto itr = AllReverseRecv.begin(); itr!=newEndOfPairs; ++itr ) {
425 if((int)(itr->first) != MyPID) {
426 rPIDs[tsize]=(int)itr->first;
427 LocalOrdinal lid = MyDomainMap->getLocalElement(itr->second);
428 rLIDs[tsize]=lid;
429 tsize++;
430 }
431 }
432
433 type3PIDs=rPIDs.persistingView(0,tsize);
434 type3LIDs=rLIDs.persistingView(0,tsize);
435
436 if(error){
437 std::cerr<<errstr.str()<<std::flush;
438 comm->barrier();
439 comm->barrier();
440 comm->barrier();
441 MPI_Abort (MPI_COMM_WORLD, -1);
442 }
443#endif
444}
445
446// Note: This should get merged with the other Tpetra sort routines eventually.
447template<typename Scalar, typename Ordinal>
448void
449sortCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
450 const Teuchos::ArrayView<Ordinal> & CRS_colind,
451 const Teuchos::ArrayView<Scalar> &CRS_vals)
452{
453 // For each row, sort column entries from smallest to largest.
454 // Use shell sort. Stable sort so it is fast if indices are already sorted.
455 // Code copied from Epetra_CrsMatrix::SortEntries()
456 size_t NumRows = CRS_rowptr.size()-1;
457 size_t nnz = CRS_colind.size();
458
459 const bool permute_values_array = CRS_vals.size() > 0;
460
461 for(size_t i = 0; i < NumRows; i++){
462 size_t start=CRS_rowptr[i];
463 if(start >= nnz) continue;
464
465 size_t NumEntries = CRS_rowptr[i+1] - start;
466 Teuchos::ArrayRCP<Scalar> locValues;
467 if (permute_values_array)
468 locValues = Teuchos::arcp<Scalar>(&CRS_vals[start], 0, NumEntries, false);
469 Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[start], 0, NumEntries, false);
470
471 Ordinal n = NumEntries;
472 Ordinal m = 1;
473 while (m<n) m = m*3+1;
474 m /= 3;
475
476 while(m > 0) {
477 Ordinal max = n - m;
478 for(Ordinal j = 0; j < max; j++) {
479 for(Ordinal k = j; k >= 0; k-=m) {
480 if(locIndices[k+m] >= locIndices[k])
481 break;
482 if (permute_values_array) {
483 Scalar dtemp = locValues[k+m];
484 locValues[k+m] = locValues[k];
485 locValues[k] = dtemp;
486 }
487 Ordinal itemp = locIndices[k+m];
488 locIndices[k+m] = locIndices[k];
489 locIndices[k] = itemp;
490 }
491 }
492 m = m/3;
493 }
494 }
495}
496
497template<typename Ordinal>
498void
499sortCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
500 const Teuchos::ArrayView<Ordinal> & CRS_colind)
501{
502 // Generate dummy values array
503 Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
504 sortCrsEntries (CRS_rowptr, CRS_colind, CRS_vals);
505}
506
507namespace Impl {
508
509template<class RowOffsetsType, class ColumnIndicesType, class ValuesType>
510class SortCrsEntries {
511private:
512 typedef typename ColumnIndicesType::non_const_value_type ordinal_type;
513 typedef typename ValuesType::non_const_value_type scalar_type;
514
515public:
516 SortCrsEntries (const RowOffsetsType& ptr,
517 const ColumnIndicesType& ind,
518 const ValuesType& val) :
519 ptr_ (ptr),
520 ind_ (ind),
521 val_ (val)
522 {
523 static_assert (std::is_signed<ordinal_type>::value, "The type of each "
524 "column index -- that is, the type of each entry of ind "
525 "-- must be signed in order for this functor to work.");
526 }
527
528 KOKKOS_FUNCTION void operator() (const size_t i) const
529 {
530 const size_t nnz = ind_.extent (0);
531 const size_t start = ptr_(i);
532 const bool permute_values_array = val_.extent(0) > 0;
533
534 if (start < nnz) {
535 const size_t NumEntries = ptr_(i+1) - start;
536
537 const ordinal_type n = static_cast<ordinal_type> (NumEntries);
538 ordinal_type m = 1;
539 while (m<n) m = m*3+1;
540 m /= 3;
541
542 while (m > 0) {
543 ordinal_type max = n - m;
544 for (ordinal_type j = 0; j < max; j++) {
545 for (ordinal_type k = j; k >= 0; k -= m) {
546 const size_t sk = start+k;
547 if (ind_(sk+m) >= ind_(sk)) {
548 break;
549 }
550 if (permute_values_array) {
551 const scalar_type dtemp = val_(sk+m);
552 val_(sk+m) = val_(sk);
553 val_(sk) = dtemp;
554 }
555 const ordinal_type itemp = ind_(sk+m);
556 ind_(sk+m) = ind_(sk);
557 ind_(sk) = itemp;
558 }
559 }
560 m = m/3;
561 }
562 }
563 }
564
565 static void
566 sortCrsEntries (const RowOffsetsType& ptr,
567 const ColumnIndicesType& ind,
568 const ValuesType& val)
569 {
570 // For each row, sort column entries from smallest to largest.
571 // Use shell sort. Stable sort so it is fast if indices are already sorted.
572 // Code copied from Epetra_CrsMatrix::SortEntries()
573 // NOTE: This should not be taken as a particularly efficient way to sort
574 // rows of matrices in parallel. But it is correct, so that's something.
575 if (ptr.extent (0) == 0) {
576 return; // no rows, so nothing to sort
577 }
578 const size_t NumRows = ptr.extent (0) - 1;
579
580 typedef size_t index_type; // what this function was using; not my choice
581 typedef typename ValuesType::execution_space execution_space;
582 // Specify RangePolicy explicitly, in order to use correct execution
583 // space. See #1345.
584 typedef Kokkos::RangePolicy<execution_space, index_type> range_type;
585 Kokkos::parallel_for ("sortCrsEntries", range_type (0, NumRows),
586 SortCrsEntries (ptr, ind, val));
587 }
588
589private:
590 RowOffsetsType ptr_;
591 ColumnIndicesType ind_;
592 ValuesType val_;
593};
594
595} // namespace Impl
596
597template<typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
598void
599sortCrsEntries (const rowptr_array_type& CRS_rowptr,
600 const colind_array_type& CRS_colind,
601 const vals_array_type& CRS_vals)
602{
603 Impl::SortCrsEntries<rowptr_array_type, colind_array_type,
604 vals_array_type>::sortCrsEntries (CRS_rowptr, CRS_colind, CRS_vals);
605}
606
607template<typename rowptr_array_type, typename colind_array_type>
608void
609sortCrsEntries (const rowptr_array_type& CRS_rowptr,
610 const colind_array_type& CRS_colind)
611{
612 // Generate dummy values array
613 typedef typename colind_array_type::execution_space execution_space;
614 typedef Tpetra::Details::DefaultTypes::scalar_type scalar_type;
615 typedef typename Kokkos::View<scalar_type*, execution_space> scalar_view_type;
616 scalar_view_type CRS_vals;
617 sortCrsEntries<rowptr_array_type, colind_array_type,
618 scalar_view_type>(CRS_rowptr, CRS_colind, CRS_vals);
619}
620
621// Note: This should get merged with the other Tpetra sort routines eventually.
622template<typename Scalar, typename Ordinal>
623void
624sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
625 const Teuchos::ArrayView<Ordinal> & CRS_colind,
626 const Teuchos::ArrayView<Scalar> &CRS_vals)
627{
628 // For each row, sort column entries from smallest to largest,
629 // merging column ids that are identify by adding values. Use shell
630 // sort. Stable sort so it is fast if indices are already sorted.
631 // Code copied from Epetra_CrsMatrix::SortEntries()
632
633 if (CRS_rowptr.size () == 0) {
634 return; // no rows, so nothing to sort
635 }
636 const size_t NumRows = CRS_rowptr.size () - 1;
637 const size_t nnz = CRS_colind.size ();
638 size_t new_curr = CRS_rowptr[0];
639 size_t old_curr = CRS_rowptr[0];
640
641 const bool permute_values_array = CRS_vals.size() > 0;
642
643 for(size_t i = 0; i < NumRows; i++){
644 const size_t old_rowptr_i=CRS_rowptr[i];
645 CRS_rowptr[i] = old_curr;
646 if(old_rowptr_i >= nnz) continue;
647
648 size_t NumEntries = CRS_rowptr[i+1] - old_rowptr_i;
649 Teuchos::ArrayRCP<Scalar> locValues;
650 if (permute_values_array)
651 locValues = Teuchos::arcp<Scalar>(&CRS_vals[old_rowptr_i], 0, NumEntries, false);
652 Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[old_rowptr_i], 0, NumEntries, false);
653
654 // Sort phase
655 Ordinal n = NumEntries;
656 Ordinal m = n/2;
657
658 while(m > 0) {
659 Ordinal max = n - m;
660 for(Ordinal j = 0; j < max; j++) {
661 for(Ordinal k = j; k >= 0; k-=m) {
662 if(locIndices[k+m] >= locIndices[k])
663 break;
664 if (permute_values_array) {
665 Scalar dtemp = locValues[k+m];
666 locValues[k+m] = locValues[k];
667 locValues[k] = dtemp;
668 }
669 Ordinal itemp = locIndices[k+m];
670 locIndices[k+m] = locIndices[k];
671 locIndices[k] = itemp;
672 }
673 }
674 m = m/2;
675 }
676
677 // Merge & shrink
678 for(size_t j=old_rowptr_i; j < CRS_rowptr[i+1]; j++) {
679 if(j > old_rowptr_i && CRS_colind[j]==CRS_colind[new_curr-1]) {
680 if (permute_values_array) CRS_vals[new_curr-1] += CRS_vals[j];
681 }
682 else if(new_curr==j) {
683 new_curr++;
684 }
685 else {
686 CRS_colind[new_curr] = CRS_colind[j];
687 if (permute_values_array) CRS_vals[new_curr] = CRS_vals[j];
688 new_curr++;
689 }
690 }
691 old_curr=new_curr;
692 }
693
694 CRS_rowptr[NumRows] = new_curr;
695}
696
697template<typename Ordinal>
698void
699sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
700 const Teuchos::ArrayView<Ordinal> & CRS_colind)
701{
702 Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
703 return sortAndMergeCrsEntries(CRS_rowptr, CRS_colind, CRS_vals);
704}
705
706template<class rowptr_view_type, class colind_view_type, class vals_view_type>
707void
708sortAndMergeCrsEntries(rowptr_view_type& CRS_rowptr,
709 colind_view_type& CRS_colind,
710 vals_view_type& CRS_vals)
711{
712 using execution_space = typename vals_view_type::execution_space;
713
714 auto CRS_rowptr_in = CRS_rowptr;
715 auto CRS_colind_in = CRS_colind;
716 auto CRS_vals_in = CRS_vals;
717
718 KokkosSparse::sort_and_merge_matrix<execution_space, rowptr_view_type,
719 colind_view_type, vals_view_type>(CRS_rowptr_in, CRS_colind_in, CRS_vals_in,
720 CRS_rowptr, CRS_colind, CRS_vals);
721}
722
723
724template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
725void
726lowCommunicationMakeColMapAndReindexSerial (const Teuchos::ArrayView<const size_t> &rowptr,
727 const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
728 const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
729 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
730 const Teuchos::ArrayView<const int> &owningPIDs,
731 Teuchos::Array<int> &remotePIDs,
732 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
733{
734 using Teuchos::rcp;
735 typedef LocalOrdinal LO;
736 typedef GlobalOrdinal GO;
737 typedef Tpetra::global_size_t GST;
738 typedef Tpetra::Map<LO, GO, Node> map_type;
739 const char prefix[] = "lowCommunicationMakeColMapAndReindexSerial: ";
740
741 // The domainMap is an RCP because there is a shortcut for a
742 // (common) special case to return the columnMap = domainMap.
743 const map_type& domainMap = *domainMapRCP;
744
745 // Scan all column indices and sort into two groups:
746 // Local: those whose GID matches a GID of the domain map on this processor and
747 // Remote: All others.
748 const size_t numDomainElements = domainMap.getLocalNumElements ();
749 Teuchos::Array<bool> LocalGIDs;
750 if (numDomainElements > 0) {
751 LocalGIDs.resize (numDomainElements, false); // Assume domain GIDs are not local
752 }
753
754 // In principle it is good to have RemoteGIDs and RemotGIDList be as
755 // long as the number of remote GIDs on this processor, but this
756 // would require two passes through the column IDs, so we make it
757 // the max of 100 and the number of block rows.
758 //
759 // FIXME (mfh 11 Feb 2015) Tpetra::Details::HashTable can hold at
760 // most INT_MAX entries, but it's possible to have more rows than
761 // that (if size_t is 64 bits and int is 32 bits).
762 const size_t numMyRows = rowptr.size () - 1;
763 const int hashsize = std::max (static_cast<int> (numMyRows), 100);
764
765 Tpetra::Details::HashTable<GO, LO> RemoteGIDs (hashsize);
766 Teuchos::Array<GO> RemoteGIDList;
767 RemoteGIDList.reserve (hashsize);
768 Teuchos::Array<int> PIDList;
769 PIDList.reserve (hashsize);
770
771 // Here we start using the *LocalOrdinal* colind_LID array. This is
772 // safe even if both columnIndices arrays are actually the same
773 // (because LocalOrdinal==GO). For *local* GID's set
774 // colind_LID with with their LID in the domainMap. For *remote*
775 // GIDs, we set colind_LID with (numDomainElements+NumRemoteColGIDs)
776 // before the increment of the remote count. These numberings will
777 // be separate because no local LID is greater than
778 // numDomainElements.
779
780 size_t NumLocalColGIDs = 0;
781 LO NumRemoteColGIDs = 0;
782 for (size_t i = 0; i < numMyRows; ++i) {
783 for(size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
784 const GO GID = colind_GID[j];
785 // Check if GID matches a row GID
786 const LO LID = domainMap.getLocalElement (GID);
787 if(LID != -1) {
788 const bool alreadyFound = LocalGIDs[LID];
789 if (! alreadyFound) {
790 LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
791 NumLocalColGIDs++;
792 }
793 colind_LID[j] = LID;
794 }
795 else {
796 const LO hash_value = RemoteGIDs.get (GID);
797 if (hash_value == -1) { // This means its a new remote GID
798 const int PID = owningPIDs[j];
799 TEUCHOS_TEST_FOR_EXCEPTION(
800 PID == -1, std::invalid_argument, prefix << "Cannot figure out if "
801 "PID is owned.");
802 colind_LID[j] = static_cast<LO> (numDomainElements + NumRemoteColGIDs);
803 RemoteGIDs.add (GID, NumRemoteColGIDs);
804 RemoteGIDList.push_back (GID);
805 PIDList.push_back (PID);
806 NumRemoteColGIDs++;
807 }
808 else {
809 colind_LID[j] = static_cast<LO> (numDomainElements + hash_value);
810 }
811 }
812 }
813 }
814
815 // Possible short-circuit: If all domain map GIDs are present as
816 // column indices, then set ColMap=domainMap and quit.
817 if (domainMap.getComm ()->getSize () == 1) {
818 // Sanity check: When there is only one process, there can be no
819 // remoteGIDs.
820 TEUCHOS_TEST_FOR_EXCEPTION(
821 NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
822 "process in the domain Map's communicator, which means that there are no "
823 "\"remote\" indices. Nevertheless, some column indices are not in the "
824 "domain Map.");
825 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
826 // In this case, we just use the domainMap's indices, which is,
827 // not coincidently, what we clobbered colind with up above
828 // anyway. No further reindexing is needed.
829 colMap = domainMapRCP;
830 return;
831 }
832 }
833
834 // Now build the array containing column GIDs
835 // Build back end, containing remote GIDs, first
836 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
837 Teuchos::Array<GO> ColIndices;
838 GO* RemoteColIndices = NULL;
839 if (numMyCols > 0) {
840 ColIndices.resize (numMyCols);
841 if (NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
842 RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices
843 }
844 }
845
846 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
847 RemoteColIndices[i] = RemoteGIDList[i];
848 }
849
850 // Build permute array for *remote* reindexing.
851 Teuchos::Array<LO> RemotePermuteIDs (NumRemoteColGIDs);
852 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
853 RemotePermuteIDs[i]=i;
854 }
855
856 // Sort External column indices so that all columns coming from a
857 // given remote processor are contiguous. This is a sort with two
858 // auxilary arrays: RemoteColIndices and RemotePermuteIDs.
859 Tpetra::sort3 (PIDList.begin (), PIDList.end (),
860 ColIndices.begin () + NumLocalColGIDs,
861 RemotePermuteIDs.begin ());
862
863 // Stash the RemotePIDs.
864 //
865 // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
866 // we'd call it here.
867 remotePIDs = PIDList;
868
869 // Sort external column indices so that columns from a given remote
870 // processor are not only contiguous but also in ascending
871 // order. NOTE: I don't know if the number of externals associated
872 // with a given remote processor is known at this point ... so I
873 // count them here.
874
875 // NTS: Only sort the RemoteColIndices this time...
876 LO StartCurrent = 0, StartNext = 1;
877 while (StartNext < NumRemoteColGIDs) {
878 if (PIDList[StartNext]==PIDList[StartNext-1]) {
879 StartNext++;
880 }
881 else {
882 Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
883 ColIndices.begin () + NumLocalColGIDs + StartNext,
884 RemotePermuteIDs.begin () + StartCurrent);
885 StartCurrent = StartNext;
886 StartNext++;
887 }
888 }
889 Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
890 ColIndices.begin () + NumLocalColGIDs + StartNext,
891 RemotePermuteIDs.begin () + StartCurrent);
892
893 // Reverse the permutation to get the information we actually care about
894 Teuchos::Array<LO> ReverseRemotePermuteIDs (NumRemoteColGIDs);
895 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
896 ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
897 }
898
899 // Build permute array for *local* reindexing.
900 bool use_local_permute = false;
901 Teuchos::Array<LO> LocalPermuteIDs (numDomainElements);
902
903 // Now fill front end. Two cases:
904 //
905 // (1) If the number of Local column GIDs is the same as the number
906 // of Local domain GIDs, we can simply read the domain GIDs into
907 // the front part of ColIndices, otherwise
908 //
909 // (2) We step through the GIDs of the domainMap, checking to see if
910 // each domain GID is a column GID. we want to do this to
911 // maintain a consistent ordering of GIDs between the columns
912 // and the domain.
913 Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getLocalElementList();
914 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
915 if (NumLocalColGIDs > 0) {
916 // Load Global Indices into first numMyCols elements column GID list
917 std::copy (domainGlobalElements.begin (), domainGlobalElements.end (),
918 ColIndices.begin ());
919 }
920 }
921 else {
922 LO NumLocalAgain = 0;
923 use_local_permute = true;
924 for (size_t i = 0; i < numDomainElements; ++i) {
925 if (LocalGIDs[i]) {
926 LocalPermuteIDs[i] = NumLocalAgain;
927 ColIndices[NumLocalAgain++] = domainGlobalElements[i];
928 }
929 }
930 TEUCHOS_TEST_FOR_EXCEPTION(
931 static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
932 std::runtime_error, prefix << "Local ID count test failed.");
933 }
934
935 // Make column Map
936 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
937 colMap = rcp (new map_type (minus_one, ColIndices, domainMap.getIndexBase (),
938 domainMap.getComm ()));
939
940 // Low-cost reindex of the matrix
941 for (size_t i = 0; i < numMyRows; ++i) {
942 for (size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
943 const LO ID = colind_LID[j];
944 if (static_cast<size_t> (ID) < numDomainElements) {
945 if (use_local_permute) {
946 colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
947 }
948 // In the case where use_local_permute==false, we just copy
949 // the DomainMap's ordering, which it so happens is what we
950 // put in colind_LID to begin with.
951 }
952 else {
953 colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements];
954 }
955 }
956 }
957}
958
959
960template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
961void
963 const Teuchos::ArrayView<const size_t> &rowptr,
964 const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
965 const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
966 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
967 const Teuchos::ArrayView<const int> &owningPIDs,
968 Teuchos::Array<int> &remotePIDs,
969 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
970{
971 using Teuchos::rcp;
972 typedef LocalOrdinal LO;
973 typedef GlobalOrdinal GO;
974 typedef Tpetra::global_size_t GST;
976 const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
977
978 typedef typename Node::device_type DT;
979 using execution_space = typename DT::execution_space;
980 execution_space exec;
981 using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
982 typedef typename map_type::local_map_type local_map_type;
983
984 // Create device mirror and host mirror views from function parameters
985 // When we pass in views instead of Teuchos::ArrayViews, we can avoid copying views
986 auto colind_LID_view = Details::create_mirror_view_from_raw_host_array(exec, colind_LID.getRawPtr(), colind_LID.size(), true, "colind_LID");
987 auto rowptr_view = Details::create_mirror_view_from_raw_host_array(exec, rowptr.getRawPtr(), rowptr.size(), true, "rowptr");
988 auto colind_GID_view = Details::create_mirror_view_from_raw_host_array(exec, colind_GID.getRawPtr(), colind_GID.size(), true, "colind_GID");
989 auto owningPIDs_view = Details::create_mirror_view_from_raw_host_array(exec, owningPIDs.getRawPtr(), owningPIDs.size(), true, "owningPIDs");
990
991 typename decltype(colind_LID_view)::HostMirror colind_LID_host(colind_LID.getRawPtr(), colind_LID.size());
992 typename decltype(colind_GID_view)::HostMirror colind_GID_host(colind_GID.getRawPtr(), colind_GID.size());
993
994 Kokkos::deep_copy(colind_LID_view, colind_LID_host);
995 Kokkos::deep_copy(colind_GID_view, colind_GID_host);
996
997 // The domainMap is an RCP because there is a shortcut for a
998 // (common) special case to return the columnMap = domainMap.
999 const map_type& domainMap = *domainMapRCP;
1000
1001 Kokkos::UnorderedMap<LO, bool, DT> LocalGIDs_view_map(colind_LID.size());
1002 Kokkos::UnorderedMap<GO, LO, DT> RemoteGIDs_view_map(colind_LID.size());
1003
1004 const size_t numMyRows = rowptr.size () - 1;
1005 local_map_type domainMap_local = domainMap.getLocalMap();
1006
1007 const size_t numDomainElements = domainMap.getLocalNumElements ();
1008 Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
1009 auto LocalGIDs_host = Kokkos::create_mirror_view(LocalGIDs_view);
1010
1011 size_t NumLocalColGIDs = 0;
1012
1013 // Scan all column indices and sort into two groups:
1014 // Local: those whose GID matches a GID of the domain map on this processor and
1015 // Remote: All others.
1016 // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
1017 Kokkos::parallel_reduce(team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type &member, size_t &update) {
1018 const int i = member.league_rank();
1019 size_t NumLocalColGIDs_temp = 0;
1020 size_t rowptr_start = rowptr_view[i];
1021 size_t rowptr_end = rowptr_view[i+1];
1022 Kokkos::parallel_reduce(Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, size_t &innerUpdate) {
1023 const GO GID = colind_GID_view[j];
1024 // Check if GID matches a row GID in local domain map
1025 const LO LID = domainMap_local.getLocalElement (GID);
1026 if (LID != -1) {
1027 auto outcome = LocalGIDs_view_map.insert(LID);
1028 // Fresh insert
1029 if(outcome.success()) {
1030 LocalGIDs_view[LID] = true;
1031 innerUpdate++;
1032 }
1033 }
1034 else {
1035 const int PID = owningPIDs_view[j];
1036 auto outcome = RemoteGIDs_view_map.insert(GID, PID);
1037 if(outcome.success() && PID == -1) {
1038 Kokkos::abort("Cannot figure out if ID is owned.\n");
1039 }
1040 }
1041 }, NumLocalColGIDs_temp);
1042 if(member.team_rank() == 0) update += NumLocalColGIDs_temp;
1043 }, NumLocalColGIDs);
1044
1045 LO NumRemoteColGIDs = RemoteGIDs_view_map.size();
1046
1047 Kokkos::View<int*, DT> PIDList_view("PIDList", NumRemoteColGIDs);
1048 auto PIDList_host = Kokkos::create_mirror_view(PIDList_view);
1049
1050 Kokkos::View<GO*, DT> RemoteGIDList_view("RemoteGIDList", NumRemoteColGIDs);
1051 auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
1052
1053 // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
1054 // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
1055 Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(const int i, GO& update, const bool final) {
1056 if(final && RemoteGIDs_view_map.valid_at(i)) {
1057 RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
1058 PIDList_view[update] = RemoteGIDs_view_map.value_at(i);
1059 }
1060 if(RemoteGIDs_view_map.valid_at(i)) {
1061 update += 1;
1062 }
1063 });
1064
1065 // Possible short-circuit: If all domain map GIDs are present as
1066 // column indices, then set ColMap=domainMap and quit.
1067 if (domainMap.getComm ()->getSize () == 1) {
1068 // Sanity check: When there is only one process, there can be no
1069 // remoteGIDs.
1070 TEUCHOS_TEST_FOR_EXCEPTION(
1071 NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
1072 "process in the domain Map's communicator, which means that there are no "
1073 "\"remote\" indices. Nevertheless, some column indices are not in the "
1074 "domain Map.");
1075 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1076 // In this case, we just use the domainMap's indices, which is,
1077 // not coincidently, what we clobbered colind with up above
1078 // anyway. No further reindexing is needed.
1079 colMap = domainMapRCP;
1080
1081 // Fill out local colMap (which should only contain local GIDs)
1082 auto localColMap = colMap->getLocalMap();
1083 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID.size()), KOKKOS_LAMBDA(const int i) {
1084 colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1085 });
1086 Kokkos::deep_copy(execution_space(), colind_LID_host, colind_LID_view);
1087 return;
1088 }
1089 }
1090
1091 // Now build the array containing column GIDs
1092 // Build back end, containing remote GIDs, first
1093 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1094 Kokkos::View<GO*, DT> ColIndices_view("ColIndices", numMyCols);
1095
1096 // We don't need to load the backend of ColIndices or sort if there are no remote GIDs
1097 if(NumRemoteColGIDs > 0) {
1098 if(NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
1099 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const int i) {
1100 ColIndices_view[NumLocalColGIDs+i] = RemoteGIDList_view[i];
1101 });
1102 }
1103
1104 // Find the largest PID for bin sorting purposes
1105 int PID_max = 0;
1106 Kokkos::parallel_reduce(Kokkos::RangePolicy<execution_space>(0, PIDList_host.size()), KOKKOS_LAMBDA(const int i, int& max) {
1107 if(max < PIDList_view[i]) max = PIDList_view[i];
1108 }, Kokkos::Max<int>(PID_max));
1109
1110 using KeyViewTypePID = decltype(PIDList_view);
1111 using BinSortOpPID = Kokkos::BinOp1D<KeyViewTypePID>;
1112
1113 // Make a subview of ColIndices for remote GID sorting
1114 auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
1115
1116 // Make binOp with bins = PID_max + 1, min = 0, max = PID_max
1117 BinSortOpPID binOp2(PID_max+1, 0, PID_max);
1118
1119 // Sort External column indices so that all columns coming from a
1120 // given remote processor are contiguous. This is a sort with one
1121 // auxilary array: RemoteColIndices
1122 Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, false);
1123 bin_sort2.create_permute_vector(exec);
1124 bin_sort2.sort(exec, PIDList_view);
1125 bin_sort2.sort(exec, ColIndices_subview);
1126
1127 // Deep copy back from device to host
1128 Kokkos::deep_copy(exec, PIDList_host, PIDList_view);
1129
1130 // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1131 // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
1132 // we'd call it here.
1133
1134 exec.fence("fence before setting PIDList");
1135 Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1136 for(LO i = 0; i < NumRemoteColGIDs; ++i) {
1137 PIDList[i] = PIDList_host[i];
1138 }
1139
1140 remotePIDs = PIDList;
1141
1142 // Sort external column indices so that columns from a given remote
1143 // processor are not only contiguous but also in ascending
1144 // order. NOTE: I don't know if the number of externals associated
1145 // with a given remote processor is known at this point ... so I
1146 // count them here.
1147 LO StartCurrent = 0, StartNext = 1;
1148 while(StartNext < NumRemoteColGIDs) {
1149 if(PIDList_host[StartNext] == PIDList_host[StartNext-1]) {
1150 StartNext++;
1151 }
1152 else {
1153 Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1154 StartCurrent = StartNext;
1155 StartNext++;
1156 }
1157 }
1158
1159 Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1160 }
1161
1162
1163 // Build permute array for *local* reindexing.
1164
1165 // Now fill front end. Two cases:
1166 //
1167 // (1) If the number of Local column GIDs is the same as the number
1168 // of Local domain GIDs, we can simply read the domain GIDs into
1169 // the front part of ColIndices, otherwise
1170 //
1171 // (2) We step through the GIDs of the domainMap, checking to see if
1172 // each domain GID is a column GID. we want to do this to
1173 // maintain a consistent ordering of GIDs between the columns
1174 // and the domain.
1175 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1176 if (NumLocalColGIDs > 0) {
1177 // Load Global Indices into first numMyCols elements column GID list
1178 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i) {
1179 ColIndices_view[i] = domainMap_local.getGlobalElement(i);
1180 });
1181 }
1182 }
1183 else {
1184 // This part isn't actually tested in the unit tests
1185 LO NumLocalAgain = 0;
1186 Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1187 if(final && LocalGIDs_view[i]) {
1188 ColIndices_view[update] = domainMap_local.getGlobalElement(i);
1189 }
1190 if(LocalGIDs_view[i]) {
1191 update++;
1192 }
1193 }, NumLocalAgain);
1194
1195
1196 TEUCHOS_TEST_FOR_EXCEPTION(
1197 static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
1198 std::runtime_error, prefix << "Local ID count test failed.");
1199 }
1200
1201 // Make column Map
1202 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
1203
1204 colMap = rcp (new map_type (minus_one, ColIndices_view, domainMap.getIndexBase (),
1205 domainMap.getComm ()));
1206
1207 // Fill out colind_LID using local map
1208 auto localColMap = colMap->getLocalMap();
1209 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID.size()), KOKKOS_LAMBDA(const int i) {
1210 colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1211 });
1212
1213 // For now, we copy back into colind_LID_host (which also overwrites the colind_LID Tuechos array)
1214 // When colind_LID becomes a Kokkos View we can delete this
1215 Kokkos::deep_copy(exec, colind_LID_host, colind_LID_view);
1216}
1217
1218template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1219void
1220lowCommunicationMakeColMapAndReindex (
1221 const Kokkos::View<size_t*,typename Node::device_type> rowptr_view,
1222 const Kokkos::View<LocalOrdinal*,typename Node::device_type> colind_LID_view,
1223 const Kokkos::View<GlobalOrdinal*,typename Node::device_type> colind_GID_view,
1224 const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
1225 const Kokkos::View<int*,typename Node::device_type> owningPIDs_view,
1226 Teuchos::Array<int> &remotePIDs,
1227 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
1228{
1229 using Teuchos::rcp;
1230 typedef LocalOrdinal LO;
1231 typedef GlobalOrdinal GO;
1232 typedef Tpetra::global_size_t GST;
1234 const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
1235
1236 typedef typename Node::device_type DT;
1237 using execution_space = typename DT::execution_space;
1238 execution_space exec;
1239 using team_policy = Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
1240 typedef typename map_type::local_map_type local_map_type;
1241
1242 // The domainMap is an RCP because there is a shortcut for a
1243 // (common) special case to return the columnMap = domainMap.
1244 const map_type& domainMap = *domainMapRCP;
1245
1246 Kokkos::UnorderedMap<LO, bool, DT> LocalGIDs_view_map(colind_LID_view.size());
1247 Kokkos::UnorderedMap<GO, LO, DT> RemoteGIDs_view_map(colind_LID_view.size());
1248
1249 const size_t numMyRows = rowptr_view.size () - 1;
1250 local_map_type domainMap_local = domainMap.getLocalMap();
1251
1252 const size_t numDomainElements = domainMap.getLocalNumElements ();
1253 Kokkos::View<bool*, DT> LocalGIDs_view("LocalGIDs", numDomainElements);
1254 auto LocalGIDs_host = Kokkos::create_mirror_view(LocalGIDs_view);
1255
1256 size_t NumLocalColGIDs = 0;
1257
1258 // Scan all column indices and sort into two groups:
1259 // Local: those whose GID matches a GID of the domain map on this processor and
1260 // Remote: All others.
1261 // Kokkos::Parallel_reduce sums up NumLocalColGIDs, while we use the size of the Remote GIDs map to find NumRemoteColGIDs
1262 Kokkos::parallel_reduce(team_policy(numMyRows, Kokkos::AUTO), KOKKOS_LAMBDA(const typename team_policy::member_type &member, size_t &update) {
1263 const int i = member.league_rank();
1264 size_t NumLocalColGIDs_temp = 0;
1265 size_t rowptr_start = rowptr_view[i];
1266 size_t rowptr_end = rowptr_view[i+1];
1267 Kokkos::parallel_reduce(Kokkos::TeamThreadRange(member, rowptr_start, rowptr_end), [&](const size_t j, size_t &innerUpdate) {
1268 const GO GID = colind_GID_view[j];
1269 // Check if GID matches a row GID in local domain map
1270 const LO LID = domainMap_local.getLocalElement (GID);
1271 if (LID != -1) {
1272 auto outcome = LocalGIDs_view_map.insert(LID);
1273 // Fresh insert
1274 if(outcome.success()) {
1275 LocalGIDs_view[LID] = true;
1276 innerUpdate++;
1277 }
1278 }
1279 else {
1280 const int PID = owningPIDs_view[j];
1281 auto outcome = RemoteGIDs_view_map.insert(GID, PID);
1282 if(outcome.success() && PID == -1) {
1283 Kokkos::abort("Cannot figure out if ID is owned.\n");
1284 }
1285 }
1286 }, NumLocalColGIDs_temp);
1287 if(member.team_rank() == 0) update += NumLocalColGIDs_temp;
1288 }, NumLocalColGIDs);
1289
1290 LO NumRemoteColGIDs = RemoteGIDs_view_map.size();
1291
1292 Kokkos::View<int*, DT> PIDList_view("PIDList_d", NumRemoteColGIDs);
1293
1294 Kokkos::View<GO*, DT> RemoteGIDList_view("RemoteGIDList", NumRemoteColGIDs);
1295 auto RemoteGIDList_host = Kokkos::create_mirror_view(RemoteGIDList_view);
1296
1297 // For each index in RemoteGIDs_map that contains a GID, use "update" to indicate the number of GIDs "before" this GID
1298 // This maps each element in the RemoteGIDs hash table to an index in RemoteGIDList / PIDList without any overwriting or empty spaces between indices
1299 Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, RemoteGIDs_view_map.capacity()), KOKKOS_LAMBDA(const int i, GO& update, const bool final) {
1300 if(final && RemoteGIDs_view_map.valid_at(i)) {
1301 RemoteGIDList_view[update] = RemoteGIDs_view_map.key_at(i);
1302 PIDList_view[update] = RemoteGIDs_view_map.value_at(i);
1303 }
1304 if(RemoteGIDs_view_map.valid_at(i)) {
1305 update += 1;
1306 }
1307 });
1308
1309 // Possible short-circuit: If all domain map GIDs are present as
1310 // column indices, then set ColMap=domainMap and quit.
1311 if (domainMap.getComm ()->getSize () == 1) {
1312 // Sanity check: When there is only one process, there can be no
1313 // remoteGIDs.
1314 TEUCHOS_TEST_FOR_EXCEPTION(
1315 NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
1316 "process in the domain Map's communicator, which means that there are no "
1317 "\"remote\" indices. Nevertheless, some column indices are not in the "
1318 "domain Map.");
1319 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1320 // In this case, we just use the domainMap's indices, which is,
1321 // not coincidently, what we clobbered colind with up above
1322 // anyway. No further reindexing is needed.
1323 colMap = domainMapRCP;
1324
1325 // Fill out local colMap (which should only contain local GIDs)
1326 auto localColMap = colMap->getLocalMap();
1327 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1328 colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1329 });
1330 return;
1331 }
1332 }
1333
1334 // Now build the array containing column GIDs
1335 // Build back end, containing remote GIDs, first
1336 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1337 Kokkos::View<GO*, DT> ColIndices_view("ColIndices", numMyCols);
1338
1339 // We don't need to load the backend of ColIndices or sort if there are no remote GIDs
1340 if(NumRemoteColGIDs > 0) {
1341 if(NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
1342 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, NumRemoteColGIDs), KOKKOS_LAMBDA(const int i) {
1343 ColIndices_view[NumLocalColGIDs+i] = RemoteGIDList_view[i];
1344 });
1345 }
1346
1347 // Find the largest PID for bin sorting purposes
1348 int PID_max = 0;
1349 Kokkos::parallel_reduce(Kokkos::RangePolicy<execution_space>(0, PIDList_view.size()), KOKKOS_LAMBDA(const int i, int& max) {
1350 if(max < PIDList_view[i]) max = PIDList_view[i];
1351 }, Kokkos::Max<int>(PID_max));
1352
1353 using KeyViewTypePID = decltype(PIDList_view);
1354 using BinSortOpPID = Kokkos::BinOp1D<KeyViewTypePID>;
1355
1356 // Make a subview of ColIndices for remote GID sorting
1357 auto ColIndices_subview = Kokkos::subview(ColIndices_view, Kokkos::make_pair(NumLocalColGIDs, ColIndices_view.size()));
1358
1359 // Make binOp with bins = PID_max + 1, min = 0, max = PID_max
1360 BinSortOpPID binOp2(PID_max+1, 0, PID_max);
1361
1362 // Sort External column indices so that all columns coming from a
1363 // given remote processor are contiguous. This is a sort with one
1364 // auxilary array: RemoteColIndices
1365 Kokkos::BinSort<KeyViewTypePID, BinSortOpPID> bin_sort2(PIDList_view, 0, PIDList_view.size(), binOp2, false);
1366 bin_sort2.create_permute_vector(exec);
1367 bin_sort2.sort(exec, PIDList_view);
1368 bin_sort2.sort(exec, ColIndices_subview);
1369
1370 // Deep copy back from device to host
1371 // Stash the RemotePIDs. Once remotePIDs is changed to become a Kokkos view, we can remove this and copy directly.
1372 Teuchos::Array<int> PIDList(NumRemoteColGIDs);
1373 Kokkos::View<int*, Kokkos::HostSpace> PIDList_host(PIDList.data(), PIDList.size());
1374 Kokkos::deep_copy(exec, PIDList_host, PIDList_view);
1375 exec.fence();
1376
1377 remotePIDs = PIDList;
1378
1379 // Sort external column indices so that columns from a given remote
1380 // processor are not only contiguous but also in ascending
1381 // order. NOTE: I don't know if the number of externals associated
1382 // with a given remote processor is known at this point ... so I
1383 // count them here.
1384 LO StartCurrent = 0, StartNext = 1;
1385 while(StartNext < NumRemoteColGIDs) {
1386 if(PIDList_host[StartNext] == PIDList_host[StartNext-1]) {
1387 StartNext++;
1388 }
1389 else {
1390 Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1391 StartCurrent = StartNext;
1392 StartNext++;
1393 }
1394 }
1395
1396 Kokkos::sort(ColIndices_view, NumLocalColGIDs + StartCurrent, NumLocalColGIDs + StartNext);
1397 }
1398
1399
1400 // Build permute array for *local* reindexing.
1401
1402 // Now fill front end. Two cases:
1403 //
1404 // (1) If the number of Local column GIDs is the same as the number
1405 // of Local domain GIDs, we can simply read the domain GIDs into
1406 // the front part of ColIndices, otherwise
1407 //
1408 // (2) We step through the GIDs of the domainMap, checking to see if
1409 // each domain GID is a column GID. we want to do this to
1410 // maintain a consistent ordering of GIDs between the columns
1411 // and the domain.
1412 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1413 if (NumLocalColGIDs > 0) {
1414 // Load Global Indices into first numMyCols elements column GID list
1415 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i) {
1416 ColIndices_view[i] = domainMap_local.getGlobalElement(i);
1417 });
1418 }
1419 }
1420 else {
1421 // This part isn't actually tested in the unit tests
1422 LO NumLocalAgain = 0;
1423 Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space>(0, numDomainElements), KOKKOS_LAMBDA(const int i, LO& update, const bool final) {
1424 if(final && LocalGIDs_view[i]) {
1425 ColIndices_view[update] = domainMap_local.getGlobalElement(i);
1426 }
1427 if(LocalGIDs_view[i]) {
1428 update++;
1429 }
1430 }, NumLocalAgain);
1431
1432
1433 TEUCHOS_TEST_FOR_EXCEPTION(
1434 static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
1435 std::runtime_error, prefix << "Local ID count test failed.");
1436 }
1437
1438 // Make column Map
1439 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
1440
1441 colMap = rcp (new map_type (minus_one, ColIndices_view, domainMap.getIndexBase (),
1442 domainMap.getComm ()));
1443
1444 // Fill out colind_LID using local map
1445 auto localColMap = colMap->getLocalMap();
1446 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, colind_GID_view.size()), KOKKOS_LAMBDA(const int i) {
1447 colind_LID_view[i] = localColMap.getLocalElement(colind_GID_view[i]);
1448 });
1449}
1450
1451// Generates an list of owning PIDs based on two transfer (aka import/export objects)
1452// Let:
1453// OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
1454// MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
1455// MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
1456// VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
1457// Precondition:
1458// 1) MapAo.isSameAs(*MapAm) - map compatibility between transfers
1459// 2) VectorMap->isSameAs(*owningPIDs->getMap()) - map compabibility between transfer & vector
1460// 3) OwningMap->isOneToOne() - owning map is 1-to-1
1461// --- Precondition 3 is only checked in DEBUG mode ---
1462// Postcondition:
1463// owningPIDs[VectorMap->getLocalElement(GID i)] = j iff (OwningMap->isLocalElement(GID i) on rank j)
1464template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
1465void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
1466 bool useReverseModeForOwnership,
1467 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesMigration,
1468 bool useReverseModeForMigration,
1472
1473 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > OwningMap = useReverseModeForOwnership ?
1474 transferThatDefinesOwnership.getTargetMap() :
1475 transferThatDefinesOwnership.getSourceMap();
1476 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MapAo = useReverseModeForOwnership ?
1477 transferThatDefinesOwnership.getSourceMap() :
1478 transferThatDefinesOwnership.getTargetMap();
1479 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MapAm = useReverseModeForMigration ?
1480 transferThatDefinesMigration.getTargetMap() :
1481 transferThatDefinesMigration.getSourceMap();
1482 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > VectorMap = useReverseModeForMigration ?
1483 transferThatDefinesMigration.getSourceMap() :
1484 transferThatDefinesMigration.getTargetMap();
1485
1486 TEUCHOS_TEST_FOR_EXCEPTION(!MapAo->isSameAs(*MapAm),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch between transfers");
1487 TEUCHOS_TEST_FOR_EXCEPTION(!VectorMap->isSameAs(*owningPIDs.getMap()),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch transfer and vector");
1488#ifdef HAVE_TPETRA_DEBUG
1489 TEUCHOS_TEST_FOR_EXCEPTION(!OwningMap->isOneToOne(),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
1490#endif
1491
1492 int rank = OwningMap->getComm()->getRank();
1493 // Generate "A" vector and fill it with owning information. We can read this from transferThatDefinesOwnership w/o communication
1494 // Note: Due to the 1-to-1 requirement, several of these options throw
1496 const import_type* ownAsImport = dynamic_cast<const import_type*> (&transferThatDefinesOwnership);
1497 const export_type* ownAsExport = dynamic_cast<const export_type*> (&transferThatDefinesOwnership);
1498
1499 Teuchos::ArrayRCP<int> pids = temp.getDataNonConst();
1500 Teuchos::ArrayView<int> v_pids = pids();
1501 if(ownAsImport && useReverseModeForOwnership) {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");}
1502 else if(ownAsImport && !useReverseModeForOwnership) getPids(*ownAsImport,v_pids,false);
1503 else if(ownAsExport && useReverseModeForMigration) {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");}
1504 else {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");}
1505
1506 const import_type* xferAsImport = dynamic_cast<const import_type*> (&transferThatDefinesMigration);
1507 const export_type* xferAsExport = dynamic_cast<const export_type*> (&transferThatDefinesMigration);
1508 TEUCHOS_TEST_FOR_EXCEPTION(!xferAsImport && !xferAsExport,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector transfer undefined");
1509
1510 // Migrate from "A" vector to output vector
1511 owningPIDs.putScalar(rank);
1512 if(xferAsImport && useReverseModeForMigration) owningPIDs.doExport(temp,*xferAsImport,Tpetra::REPLACE);
1513 else if(xferAsImport && !useReverseModeForMigration) owningPIDs.doImport(temp,*xferAsImport,Tpetra::REPLACE);
1514 else if(xferAsExport && useReverseModeForMigration) owningPIDs.doImport(temp,*xferAsExport,Tpetra::REPLACE);
1515 else owningPIDs.doExport(temp,*xferAsExport,Tpetra::REPLACE);
1516
1517}
1518
1519
1520
1521} // namespace Import_Util
1522} // namespace Tpetra
1523
1524#endif // TPETRA_IMPORT_UTIL_HPP
Declaration of the Tpetra::CrsMatrix class.
Add specializations of Teuchos::Details::MpiTypeTraits for Kokkos::complex<float> and Kokkos::complex...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowptr, const Teuchos::ArrayView< LocalOrdinal > &colind_LID, const Teuchos::ArrayView< GlobalOrdinal > &colind_GID, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMapRCP, const Teuchos::ArrayView< const int > &owningPIDs, Teuchos::Array< int > &remotePIDs, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap)
lowCommunicationMakeColMapAndReindex
void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferThatDefinesOwnership, bool useReverseModeForOwnership, const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferForMigratingData, bool useReverseModeForMigration, Tpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node > &owningPIDs)
Generates an list of owning PIDs based on two transfer (aka import/export objects) Let: OwningMap = u...
void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
void getPids(const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer, Teuchos::Array< int > &pids, bool use_minus_one_for_local)
Like getPidGidPairs, but just gets the PIDs, ordered by the column Map.
Stand-alone utility functions and macros.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Teuchos::ArrayView< const LO > getExportLIDs() const
List of entries in the source Map that will be sent to other processes.
size_t getNumExportIDs() const
Number of entries that must be sent by the calling process to other processes.
Teuchos::ArrayView< const int > getExportPIDs() const
List of processes to which entries will be sent.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object ("forward mode").
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
A distributed dense vector.
Teuchos::ArrayRCP< Scalar > getDataNonConst()
View of the local values of this vector.
void start()
Start the deep_copy counter.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
size_t global_size_t
Global size_t object.
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.
@ REPLACE
Replace existing values with new values.