Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_makeColMap_def.hpp
Go to the documentation of this file.
1#ifndef TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
2#define TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
3
4// @HEADER
5// *****************************************************************************
6// Tpetra: Templated Linear Algebra Services Package
7//
8// Copyright 2008 NTESS and the Tpetra contributors.
9// SPDX-License-Identifier: BSD-3-Clause
10// *****************************************************************************
11// @HEADER
12
23
24#include "Tpetra_RowGraph.hpp"
25#include "Tpetra_Util.hpp"
26#include "Teuchos_Array.hpp"
27#include "Kokkos_Bitset.hpp"
28#include <set>
29#include <vector>
30
31namespace Tpetra {
32namespace Details {
33
34template <class LO, class GO, class NT>
35int
36makeColMapImpl(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
37 Teuchos::Array<int>& remotePIDs,
38 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
39 size_t numLocalColGIDs,
40 size_t numRemoteColGIDs,
41 std::set<GO>& RemoteGIDSet,
42 std::vector<GO>& RemoteGIDUnorderedVector,
43 std::vector<bool>& GIDisLocal,
44 const bool sortEachProcsGids,
45 std::ostream* errStrm)
46{
47 using Teuchos::Array;
48 using Teuchos::ArrayView;
49 using Teuchos::rcp;
50 using std::endl;
51 int errCode = 0;
52 const char prefix[] = "Tpetra::Details::makeColMapImpl: ";
53 typedef ::Tpetra::Map<LO, GO, NT> map_type;
54 // Possible short-circuit for serial scenario:
55 //
56 // If all domain GIDs are present as column indices, then set
57 // ColMap=DomainMap. By construction, LocalGIDs is a subset of
58 // DomainGIDs.
59 //
60 // If we have
61 // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs,
62 // and
63 // * Number of local GIDs is number of domain GIDs
64 // then
65 // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) ==
66 // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs
67 // on the calling process.
68 //
69 // We will concern ourselves only with the special case of a
70 // serial DomainMap, obviating the need for communication.
71 //
72 // If
73 // * DomainMap has a serial communicator
74 // then we can set the column Map as the domain Map
75 // return. Benefit: this graph won't need an Import object
76 // later.
77 //
78 // Note, for a serial domain map, there can be no RemoteGIDs,
79 // because there are no remote processes. Likely explanations
80 // for this are:
81 // * user submitted erroneous column indices
82 // * user submitted erroneous domain Map
83 if (domMap->getComm ()->getSize () == 1) {
84 if (numRemoteColGIDs != 0) {
85 errCode = -2;
86 if (errStrm != NULL) {
87 *errStrm << prefix << "The domain Map only has one process, but "
88 << numRemoteColGIDs << " column "
89 << (numRemoteColGIDs != 1 ? "indices are" : "index is")
90 << " not in the domain Map. Either these indices are "
91 "invalid or the domain Map is invalid. Remember that nonsquare "
92 "matrices, or matrices where the row and range Maps differ, "
93 "require calling the version of fillComplete that takes the "
94 "domain and range Maps as input." << endl;
95 }
96 }
97 if (numLocalColGIDs == domMap->getLocalNumElements ()) {
98 colMap = domMap; // shallow copy
99 return errCode;
100 }
101 }
102
103 // Populate myColumns with a list of all column GIDs. Put
104 // locally owned (in the domain Map) GIDs at the front: they
105 // correspond to "same" and "permuted" entries between the
106 // column Map and the domain Map. Put remote GIDs at the back.
107 Array<GO> myColumns(numLocalColGIDs + numRemoteColGIDs);
108 // get pointers into myColumns for each part
109 ArrayView<GO> LocalColGIDs = myColumns (0, numLocalColGIDs);
110 ArrayView<GO> remoteColGIDs = myColumns (numLocalColGIDs, numRemoteColGIDs);
111
112 // Copy the remote GIDs into myColumns
113 if (sortEachProcsGids) {
114 // The std::set puts GIDs in increasing order.
115 std::copy (RemoteGIDSet.begin(), RemoteGIDSet.end(),
116 remoteColGIDs.begin());
117 }
118 else {
119 // Respect the originally encountered order.
120 std::copy (RemoteGIDUnorderedVector.begin(),
121 RemoteGIDUnorderedVector.end(), remoteColGIDs.begin());
122 }
123
124 // Make a list of process ranks corresponding to the remote GIDs.
125 // remotePIDs is an output argument of getRemoteIndexList below;
126 // its initial contents don't matter.
127 if (static_cast<size_t> (remotePIDs.size ()) != numRemoteColGIDs) {
128 remotePIDs.resize (numRemoteColGIDs);
129 }
130 // Look up the remote process' ranks in the domain Map.
131 {
132 const LookupStatus stat =
133 domMap->getRemoteIndexList (remoteColGIDs, remotePIDs ());
134
135 // If any process returns IDNotPresent, then at least one of
136 // the remote indices was not present in the domain Map. This
137 // means that the Import object cannot be constructed, because
138 // of incongruity between the column Map and domain Map.
139 // This has two likely causes:
140 // - The user has made a mistake in the column indices
141 // - The user has made a mistake with respect to the domain Map
142 if (stat == IDNotPresent) {
143 if (errStrm != NULL) {
144 *errStrm << prefix << "Some column indices are not in the domain Map."
145 "Either these column indices are invalid or the domain Map is "
146 "invalid. Likely cause: For a nonsquare matrix, you must give the "
147 "domain and range Maps as input to fillComplete." << endl;
148 }
149 // Don't return yet, because not all processes may have
150 // encountered this error state. This function ends with an
151 // all-reduce, so we have to make sure that everybody gets to
152 // that point. The resulting Map may be wrong, but at least
153 // nothing should crash.
154 errCode = -3;
155 }
156 }
157
158 // Sort incoming remote column indices by their owning process
159 // rank, so that all columns coming from a given remote process
160 // are contiguous. This means the Import's Distributor doesn't
161 // need to reorder data.
162 //
163 // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so that
164 // it respects either of the possible orderings of GIDs (sorted,
165 // or original order) specified above.
166 sort2 (remotePIDs.begin (), remotePIDs.end (), remoteColGIDs.begin (), true);
167
168 // Copy the local GIDs into myColumns. Two cases:
169 // 1. If the number of Local column GIDs is the same as the number
170 // of Local domain GIDs, we can simply read the domain GIDs
171 // into the front part of ColIndices (see logic above from the
172 // serial short circuit case)
173 // 2. We step through the GIDs of the DomainMap, checking to see
174 // if each domain GID is a column GID. We want to do this to
175 // maintain a consistent ordering of GIDs between the columns
176 // and the domain.
177
178 const size_t numDomainElts = domMap->getLocalNumElements ();
179 if (numLocalColGIDs == numDomainElts) {
180 // If the number of locally owned GIDs are the same as the
181 // number of local domain Map elements, then the local domain
182 // Map elements are the same as the locally owned GIDs.
183 if (domMap->isContiguous ()) {
184 // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
185 // the domain Map is contiguous, it's more efficient to avoid
186 // calling getLocalElementList(), since that permanently
187 // constructs and caches the GID list in the contiguous Map.
188 GO curColMapGid = domMap->getMinGlobalIndex ();
189 for (size_t k = 0; k < numLocalColGIDs; ++k, ++curColMapGid) {
190 LocalColGIDs[k] = curColMapGid;
191 }
192 }
193 else {
194 ArrayView<const GO> domainElts = domMap->getLocalElementList ();
195 std::copy (domainElts.begin(), domainElts.end(), LocalColGIDs.begin());
196 }
197 }
198 else {
199 // Count the number of locally owned GIDs, both to keep track of
200 // the current array index, and as a sanity check.
201 size_t numLocalCount = 0;
202 if (domMap->isContiguous ()) {
203 // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
204 // the domain Map is contiguous, it's more efficient to avoid
205 // calling getLocalElementList(), since that permanently
206 // constructs and caches the GID list in the contiguous Map.
207 GO curColMapGid = domMap->getMinGlobalIndex ();
208 for (size_t i = 0; i < numDomainElts; ++i, ++curColMapGid) {
209 if (GIDisLocal[i]) {
210 LocalColGIDs[numLocalCount++] = curColMapGid;
211 }
212 }
213 }
214 else {
215 ArrayView<const GO> domainElts = domMap->getLocalElementList ();
216 for (size_t i = 0; i < numDomainElts; ++i) {
217 if (GIDisLocal[i]) {
218 LocalColGIDs[numLocalCount++] = domainElts[i];
219 }
220 }
221 }
222 if (numLocalCount != numLocalColGIDs) {
223 if (errStrm != NULL) {
224 *errStrm << prefix << "numLocalCount = " << numLocalCount
225 << " != numLocalColGIDs = " << numLocalColGIDs
226 << ". This should never happen. "
227 "Please report this bug to the Tpetra developers." << endl;
228 }
229 // Don't return yet, because not all processes may have
230 // encountered this error state. This function ends with an
231 // all-reduce, so we have to make sure that everybody gets to
232 // that point.
233 errCode = -4;
234 }
235 }
236
237 // FIXME (mfh 03 Apr 2013) Now would be a good time to use the
238 // information we collected above to construct the Import. In
239 // particular, building an Import requires:
240 //
241 // 1. numSameIDs (length of initial contiguous sequence of GIDs
242 // on this process that are the same in both Maps; this
243 // equals the number of domain Map elements on this process)
244 //
245 // 2. permuteToLIDs and permuteFromLIDs (both empty in this
246 // case, since there's no permutation going on; the column
247 // Map starts with the domain Map's GIDs, and immediately
248 // after them come the remote GIDs)
249 //
250 // 3. remoteGIDs (exactly those GIDs that we found out above
251 // were not in the domain Map) and remoteLIDs (which we could
252 // have gotten above by using the three-argument version of
253 // getRemoteIndexList() that computes local indices as well
254 // as process ranks, instead of the two-argument version that
255 // was used above)
256 //
257 // 4. remotePIDs (which we have from the getRemoteIndexList() call
258 // above) -- NOTE (mfh 14 Sep 2017) Fix for Trilinos GitHub
259 // Issue #628 (https://github.com/trilinos/Trilinos/issues/628)
260 // addresses this. remotePIDs is now an output argument of
261 // both this function and CrsGraph::makeColMap, and
262 // CrsGraph::makeImportExport now has an option to use that
263 // information, if available (that is, if makeColMap was
264 // actually called, which it would not be if the graph already
265 // had a column Map).
266 //
267 // 5. Apply the permutation from sorting remotePIDs to both
268 // remoteGIDs and remoteLIDs (by calling sort3 above instead of
269 // sort2), instead of just to remoteLIDs alone.
270 //
271 // 6. Everything after the sort3 call in Import::setupExport():
272 // a. Create the Distributor via createFromRecvs(), which
273 // computes exportGIDs and exportPIDs
274 // b. Compute exportLIDs from exportGIDs (by asking the
275 // source Map, in this case the domain Map, to convert
276 // global to local)
277 //
278 // Steps 1-5 come for free, since we must do that work anyway in
279 // order to compute the column Map. In particular, Step 3 is
280 // even more expensive than Step 6a, since it involves both
281 // creating and using a new Distributor object.
282 const global_size_t INV =
283 Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
284 // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
285 // be the same as the Map's min GID? If the first column is empty
286 // (contains no entries), then the column Map's min GID won't
287 // necessarily be the same as the domain Map's index base.
288 const GO indexBase = domMap->getIndexBase ();
289 colMap = rcp (new map_type (INV, myColumns, indexBase, domMap->getComm ()));
290 return errCode;
291}
292
293template <class LO, class GO, class NT>
294int
295makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& colMap,
296 Teuchos::Array<int>& remotePIDs,
297 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& domMap,
298 const RowGraph<LO, GO, NT>& graph,
299 const bool sortEachProcsGids,
300 std::ostream* errStrm)
301{
302 using Teuchos::Array;
303 using Teuchos::ArrayView;
304 using Teuchos::rcp;
305 using std::endl;
306 typedef ::Tpetra::Map<LO, GO, NT> map_type;
307 const char prefix[] = "Tpetra::Details::makeColMap: ";
308 int errCode = 0;
309
310 // If the input domain Map or its communicator is null on the
311 // calling process, then the calling process does not participate in
312 // the returned column Map. Thus, we can set the returned column
313 // Map to null on those processes, and return immediately. This is
314 // not an error condition, as long as when domMap and its
315 // communicator are NOT null, the graph's other Maps and
316 // communicator are not also null.
317 if (domMap.is_null () || domMap->getComm ().is_null ()) {
318 colMap = Teuchos::null;
319 return errCode;
320 }
321
322 Array<GO> myColumns;
323 if (graph.isLocallyIndexed ()) {
324 colMap = graph.getColMap ();
325 // If the graph is locally indexed, it had better have a column Map.
326 // The extra check for ! graph.hasColMap() is conservative.
327 if (colMap.is_null () || ! graph.hasColMap ()) {
328 errCode = -1;
329 if (errStrm != NULL) {
330 *errStrm << prefix << "The graph is locally indexed on the calling "
331 "process, but has no column Map (either getColMap() returns null, "
332 "or hasColMap() returns false)." << endl;
333 }
334 // Under this error condition, this process will not fill
335 // myColumns. The resulting new column Map will be incorrect,
336 // but at least we won't hang, and this process will report the
337 // error.
338 }
339 else {
340 // The graph already has a column Map, and is locally indexed on
341 // the calling process. However, it may be globally indexed (or
342 // neither locally nor globally indexed) on other processes.
343 // Assume that we want to recreate the column Map.
344 if (colMap->isContiguous ()) {
345 // The number of indices on each process must fit in LO.
346 const LO numCurGids = static_cast<LO> (colMap->getLocalNumElements ());
347 myColumns.resize (numCurGids);
348 const GO myFirstGblInd = colMap->getMinGlobalIndex ();
349 for (LO k = 0; k < numCurGids; ++k) {
350 myColumns[k] = myFirstGblInd + static_cast<GO> (k);
351 }
352 }
353 else { // the column Map is NOT contiguous
354 ArrayView<const GO> curGids = graph.getColMap ()->getLocalElementList ();
355 // The number of indices on each process must fit in LO.
356 const LO numCurGids = static_cast<LO> (curGids.size ());
357 myColumns.resize (numCurGids);
358 for (LO k = 0; k < numCurGids; ++k) {
359 myColumns[k] = curGids[k];
360 }
361 } // whether the graph's current column Map is contiguous
362 } // does the graph currently have a column Map?
363 }
364 else if (graph.isGloballyIndexed ()) {
365 // Go through all the rows, finding the populated column indices.
366 //
367 // Our final list of indices for the column Map constructor will
368 // have the following properties (all of which are with respect to
369 // the calling process):
370 //
371 // 1. Indices in the domain Map go first.
372 // 2. Indices not in the domain Map follow, ordered first
373 // contiguously by their owning process rank (in the domain
374 // Map), then in increasing order within that.
375 // 3. No duplicate indices.
376 //
377 // This imitates the ordering used by Aztec(OO) and Epetra.
378 // Storing indices owned by the same process (in the domain Map)
379 // contiguously permits the use of contiguous send and receive
380 // buffers.
381 //
382 // We begin by partitioning the column indices into "local" GIDs
383 // (owned by the domain Map) and "remote" GIDs (not owned by the
384 // domain Map). We use the same order for local GIDs as the
385 // domain Map, so we track them in place in their array. We use
386 // an std::set (RemoteGIDSet) to keep track of remote GIDs, so
387 // that we don't have to merge duplicates later.
388 const LO LINV = Tpetra::Details::OrdinalTraits<LO>::invalid ();
389 size_t numLocalColGIDs = 0;
390 size_t numRemoteColGIDs = 0;
391
392 // GIDisLocal[lid] is false if and only if local index lid in the
393 // domain Map is remote (not local).
394 std::vector<bool> GIDisLocal (domMap->getLocalNumElements (), false);
395 std::set<GO> RemoteGIDSet;
396 // This preserves the not-sorted Epetra order of GIDs.
397 // We only use this if sortEachProcsGids is false.
398 std::vector<GO> RemoteGIDUnorderedVector;
399
400 if (! graph.getRowMap ().is_null ()) {
401 const Tpetra::Map<LO, GO, NT>& rowMap = * (graph.getRowMap ());
402 const LO lclNumRows = rowMap.getLocalNumElements ();
403 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
404 const GO gblRow = rowMap.getGlobalElement (lclRow);
405 typename RowGraph<LO,GO,NT>::global_inds_host_view_type rowGids;
406 graph.getGlobalRowView (gblRow, rowGids);
407
408 const LO numEnt = static_cast<LO> (rowGids.size ());
409 if (numEnt != 0) {
410 for (LO k = 0; k < numEnt; ++k) {
411 const GO gid = rowGids[k];
412 const LO lid = domMap->getLocalElement (gid);
413 if (lid != LINV) {
414 const bool alreadyFound = GIDisLocal[lid];
415 if (! alreadyFound) {
416 GIDisLocal[lid] = true;
417 ++numLocalColGIDs;
418 }
419 }
420 else {
421 const bool notAlreadyFound = RemoteGIDSet.insert (gid).second;
422 if (notAlreadyFound) { // gid did not exist in the set before
423 if (! sortEachProcsGids) {
424 // The user doesn't want to sort remote GIDs (for each
425 // remote process); they want us to keep remote GIDs
426 // in their original order. We do this by stuffing
427 // each remote GID into an array as we encounter it
428 // for the first time. The std::set helpfully tracks
429 // first encounters.
430 RemoteGIDUnorderedVector.push_back (gid);
431 }
432 ++numRemoteColGIDs;
433 }
434 }
435 } // for each entry k in row r
436 } // if row r contains > 0 entries
437 } // for each locally owned row r
438 } // if the graph has a nonnull row Map
439
440 return makeColMapImpl<LO, GO, NT>(
441 colMap, remotePIDs,
442 domMap,
443 numLocalColGIDs, numRemoteColGIDs,
444 RemoteGIDSet, RemoteGIDUnorderedVector, GIDisLocal,
445 sortEachProcsGids, errStrm);
446
447 } // if the graph is globally indexed
448 else {
449 // If we reach this point, the graph is neither locally nor
450 // globally indexed. Thus, the graph is empty on this process
451 // (per the usual legacy Petra convention), so myColumns will be
452 // left empty.
453 ; // do nothing
454 }
455
456 const global_size_t INV =
457 Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
458 // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
459 // be the same as the Map's min GID? If the first column is empty
460 // (contains no entries), then the column Map's min GID won't
461 // necessarily be the same as the domain Map's index base.
462 const GO indexBase = domMap->getIndexBase ();
463 colMap = rcp (new map_type (INV, myColumns, indexBase, domMap->getComm ()));
464 return errCode;
465}
466
467template<typename GOView, typename bitset_t>
468struct GatherPresentEntries
469{
470 using GO = typename GOView::non_const_value_type;
471
472 GatherPresentEntries(GO minGID_, const GOView& gids_, const bitset_t& present_)
473 : minGID(minGID_), gids(gids_), present(present_)
474 {}
475
476 KOKKOS_INLINE_FUNCTION void operator()(const GO i) const
477 {
478 present.set(gids(i) - minGID);
479 }
480
481 GO minGID;
482 GOView gids;
483 bitset_t present;
484};
485
486template<typename LO, typename GO, typename device_t, typename LocalMapType, typename const_bitset_t, bool doingRemotes>
487struct ListGIDs
488{
489 using mem_space = typename device_t::memory_space;
490 using GOView = Kokkos::View<GO*, mem_space>;
491 using SingleView = Kokkos::View<GO, mem_space>;
492
493 ListGIDs(GO minGID_, GOView& gidList_, SingleView& numElems_, const_bitset_t& present_, const LocalMapType& localDomainMap_)
494 : minGID(minGID_), gidList(gidList_), numElems(numElems_), present(present_), localDomainMap(localDomainMap_)
495 {}
496
497 KOKKOS_INLINE_FUNCTION void operator()(const GO i, GO& lcount, const bool finalPass) const
498 {
499 bool isRemote = localDomainMap.getLocalElement(i + minGID) == ::Tpetra::Details::OrdinalTraits<LO>::invalid();
500 if(present.test(i) && doingRemotes == isRemote)
501 {
502 if(finalPass)
503 {
504 //lcount is the index where this GID should be inserted in gidList.
505 gidList(lcount) = minGID + i;
506 }
507 lcount++;
508 }
509 if((i == static_cast<GO>(present.size() - 1)) && finalPass)
510 {
511 //Set the number of inserted indices in a single-element view
512 numElems() = lcount;
513 }
514 }
515
516 GO minGID;
517 GOView gidList;
518 SingleView numElems;
519 const_bitset_t present;
520 const LocalMapType localDomainMap;
521};
522
523template<typename GO, typename mem_space>
524struct MinMaxReduceFunctor
525{
526 using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
527 using GOView = Kokkos::View<GO*, mem_space>;
528
529 MinMaxReduceFunctor(const GOView& gids_)
530 : gids(gids_)
531 {}
532
533 KOKKOS_INLINE_FUNCTION void operator()(const GO i, MinMaxValue& lminmax) const
534 {
535 GO gid = gids(i);
536 if(gid < lminmax.min_val)
537 lminmax.min_val = gid;
538 if(gid > lminmax.max_val)
539 lminmax.max_val = gid;
540 };
541
542 const GOView gids;
543};
544
545template <class LO, class GO, class NT>
546int
547makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
548 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
549 Kokkos::View<GO*, typename NT::memory_space> gids,
550 std::ostream* errStrm)
551{
552 using Teuchos::RCP;
553 using Teuchos::Array;
554 using Kokkos::RangePolicy;
555 using device_t = typename NT::device_type;
556 using exec_space = typename device_t::execution_space;
557 using memory_space = typename device_t::memory_space;
558 // Note BMK 5-2021: this is deliberately not just device_t.
559 // Bitset cannot use HIPHostPinnedSpace currently, so this needs to
560 // use the default memory space for HIP (HIPSpace). Using the default mem
561 // space is fine for all other backends too. This bitset type is only used
562 // in this function so it won't cause type mismatches.
563 using bitset_t = Kokkos::Bitset<typename exec_space::memory_space>;
564 using const_bitset_t = Kokkos::ConstBitset<typename exec_space::memory_space>;
565 using GOView = Kokkos::View<GO*, memory_space>;
566 using SingleView = Kokkos::View<GO, memory_space>;
568 using LocalMap = typename map_type::local_map_type;
569 GO nentries = gids.extent(0);
570 GO minGID = Teuchos::OrdinalTraits<GO>::max();
571 GO maxGID = 0;
572 using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
573 MinMaxValue minMaxGID = {minGID, maxGID};
574 Kokkos::parallel_reduce(RangePolicy<exec_space>(0, nentries),
575 MinMaxReduceFunctor<GO, memory_space>(gids),
576 Kokkos::MinMax<GO>(minMaxGID));
577 minGID = minMaxGID.min_val; maxGID = minMaxGID.max_val;
578 //Now, know the full range of input GIDs.
579 //Determine the set of GIDs in the column map using a dense bitset, which corresponds to the range [minGID, maxGID]
580 bitset_t presentGIDs(maxGID - minGID + 1);
581 Kokkos::parallel_for(RangePolicy<exec_space>(0, nentries), GatherPresentEntries<GOView, bitset_t>(minGID, gids, presentGIDs));
582 const_bitset_t constPresentGIDs(presentGIDs);
583 //Get the set of local and remote GIDs on device
584 SingleView numLocals("Num local GIDs");
585 SingleView numRemotes("Num remote GIDs");
586 GOView localGIDView(Kokkos::ViewAllocateWithoutInitializing("Local GIDs"), constPresentGIDs.count());
587 GOView remoteGIDView(Kokkos::ViewAllocateWithoutInitializing("Remote GIDs"), constPresentGIDs.count());
588 LocalMap localDomMap = domMap->getLocalMap();
589 //This lists the locally owned GIDs in localGIDView
590 Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
591 ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, false>
592 (minGID, localGIDView, numLocals, constPresentGIDs, localDomMap));
593 //And this lists the remote GIDs in remoteGIDView
594 Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
595 ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, true>
596 (minGID, remoteGIDView, numRemotes, constPresentGIDs, localDomMap));
597 //Pull down the sizes
598 GO numLocalColGIDs = 0;
599 GO numRemoteColGIDs = 0;
600 // DEEP_COPY REVIEW - DEVICE-TO-VALUE
601 Kokkos::deep_copy(exec_space(), numLocalColGIDs, numLocals);
602 // DEEP_COPY REVIEW - DEVICE-TO-numLocalColGIDs
603 Kokkos::deep_copy(exec_space(), numRemoteColGIDs, numRemotes);
604 //Pull down the remote lists
605 auto localsHost = Kokkos::create_mirror_view(localGIDView);
606 auto remotesHost = Kokkos::create_mirror_view(remoteGIDView);
607 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
608 Kokkos::deep_copy(exec_space(), localsHost, localGIDView);
609 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
610 Kokkos::deep_copy(exec_space(), remotesHost, remoteGIDView);
611 // CAG: This fence was found to be required on Cuda with UVM=on.
612 Kokkos::fence("Tpetra::makeColMap");
613 //Finally, populate the STL structures which hold the index lists
614 std::set<GO> RemoteGIDSet;
615 std::vector<GO> RemoteGIDUnorderedVector;
616 std::vector<bool> GIDisLocal (domMap->getLocalNumElements (), false);
617 for(GO i = 0; i < numLocalColGIDs; i++)
618 {
619 GO gid = localsHost(i);
620 //Already know that gid is locally owned, so this index will never be invalid().
621 //makeColMapImpl uses this and the domain map to get the the local GID list.
622 GIDisLocal[domMap->getLocalElement(gid)] = true;
623 }
624 RemoteGIDUnorderedVector.reserve(numRemoteColGIDs);
625 for(GO i = 0; i < numRemoteColGIDs; i++)
626 {
627 GO gid = remotesHost(i);
628 RemoteGIDSet.insert(gid);
629 RemoteGIDUnorderedVector.push_back(gid);
630 }
631 //remotePIDs will be discarded in this version of makeColMap
632 Array<int> remotePIDs;
633 //Find the min and max GID
634 return makeColMapImpl<LO, GO, NT>(
635 colMap,
636 remotePIDs,
637 domMap,
638 static_cast<size_t>(numLocalColGIDs),
639 static_cast<size_t>(numRemoteColGIDs),
640 RemoteGIDSet,
641 RemoteGIDUnorderedVector,
642 GIDisLocal,
643 true, //always sort remotes
644 errStrm);
645}
646
647} // namespace Details
648} // namespace Tpetra
649
650//
651// Explicit instantiation macros
652//
653// Must be expanded from within the Tpetra namespace!
654//
655#define TPETRA_DETAILS_MAKECOLMAP_INSTANT(LO,GO,NT) \
656 namespace Details { \
657 template int \
658 makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
659 Teuchos::Array<int>&, \
660 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
661 const RowGraph<LO, GO, NT>&, \
662 const bool, \
663 std::ostream*); \
664 template int \
665 makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
666 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
667 Kokkos::View<GO*, typename NT::memory_space>, \
668 std::ostream*); \
669 }
670
671#endif // TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
Stand-alone utility functions and macros.
"Local" part of Map suitable for Kokkos kernels.
A parallel distribution of indices over processes.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
An abstract interface for graphs accessed by rows.
virtual bool hasColMap() const =0
Whether the graph has a well-defined column Map.
virtual bool isGloballyIndexed() const =0
If graph indices are in the global range, this function returns true. Otherwise, this function return...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes this graph's distribution of rows over processes.
virtual void getGlobalRowView(const GlobalOrdinal gblRow, global_inds_host_view_type &gblColInds) const =0
Get a const, non-persisting view of the given global row's global column indices, as a Teuchos::Array...
virtual bool isLocallyIndexed() const =0
If graph indices are in the local range, this function returns true. Otherwise, this function returns...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes this graph's distribution of columns over processes.
Nonmember function that computes a residual Computes R = B - A * X.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t global_size_t
Global size_t object.