Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Map_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
14
15#ifndef TPETRA_MAP_DEF_HPP
16#define TPETRA_MAP_DEF_HPP
17
18#include <memory>
19#include <sstream>
20#include <stdexcept>
21#include <typeinfo>
22
23#include "Teuchos_as.hpp"
24#include "Teuchos_TypeNameTraits.hpp"
25#include "Teuchos_CommHelpers.hpp"
26
27#include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
29#include "Tpetra_Details_FixedHashTable.hpp"
32#include "Tpetra_Core.hpp"
33#include "Tpetra_Util.hpp"
34#include "Tpetra_Details_mpiIsInitialized.hpp"
35#include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
38
39namespace { // (anonymous)
40
41 void
42 checkMapInputArray (const char ctorName[],
43 const void* indexList,
44 const size_t indexListSize,
45 const Teuchos::Comm<int>* const comm)
46 {
48
49 const bool debug = Behavior::debug("Map");
50 if (debug) {
51 using Teuchos::outArg;
52 using Teuchos::REDUCE_MIN;
53 using Teuchos::reduceAll;
54 using std::endl;
55
56 const int myRank = comm == nullptr ? 0 : comm->getRank ();
57 const bool verbose = Behavior::verbose("Map");
58 std::ostringstream lclErrStrm;
59 int lclSuccess = 1;
60
61 if (indexListSize != 0 && indexList == nullptr) {
62 lclSuccess = 0;
63 if (verbose) {
64 lclErrStrm << "Proc " << myRank << ": indexList is null, "
65 "but indexListSize=" << indexListSize << " != 0." << endl;
66 }
67 }
68 int gblSuccess = 0; // output argument
69 reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
70 if (gblSuccess != 1) {
71 std::ostringstream gblErrStrm;
72 gblErrStrm << "Tpetra::Map constructor " << ctorName <<
73 " detected a problem with the input array "
74 "(raw array, Teuchos::ArrayView, or Kokkos::View) "
75 "of global indices." << endl;
76 if (verbose) {
78 gathervPrint (gblErrStrm, lclErrStrm.str (), *comm);
79 }
80 TEUCHOS_TEST_FOR_EXCEPTION
81 (true, std::invalid_argument, gblErrStrm.str ());
82 }
83 }
84 }
85
86
87
88
89 template <class LocalOrdinal, class GlobalOrdinal, class ViewType>
90 void computeConstantsOnDevice(const ViewType& entryList, GlobalOrdinal & minMyGID, GlobalOrdinal & maxMyGID, GlobalOrdinal &firstContiguousGID, GlobalOrdinal &lastContiguousGID_val, LocalOrdinal &lastContiguousGID_loc) {
91 using LO = LocalOrdinal;
92 using GO = GlobalOrdinal;
93 using exec_space = typename ViewType::device_type::execution_space;
94 using range_policy = Kokkos::RangePolicy<exec_space, Kokkos::IndexType<LO> >;
95 const LO numLocalElements = entryList.extent(0);
96
97 // We're going to use the minloc backwards because we need to have it sort on the "location" and have the "value" along for the
98 // ride, rather than the other way around
99 typedef typename Kokkos::MinLoc<LO,GO>::value_type minloc_type;
100 minloc_type myMinLoc;
101
102 // Find the initial sequence of parallel gids
103 // To find the lastContiguousGID_, we find the first guy where entryList[i] - entryList[0] != i-0. That's the first non-contiguous guy.
104 // We want the one *before* that guy.
105 Kokkos::parallel_reduce(range_policy(0,numLocalElements),KOKKOS_LAMBDA(const LO & i, GO &l_myMin, GO&l_myMax, GO& l_firstCont, minloc_type & l_lastCont){
106 GO entry_0 = entryList[0];
107 GO entry_i = entryList[i];
108
109 // Easy stuff
110 l_myMin = (l_myMin < entry_i) ? l_myMin : entry_i;
111 l_myMax = (l_myMax > entry_i) ? l_myMax : entry_i;
112 l_firstCont = entry_0;
113
114 if(entry_i - entry_0 != i && l_lastCont.val >= i) {
115 // We're non-contiguous, so the guy before us could be the last contiguous guy
116 l_lastCont.val = i-1;
117 l_lastCont.loc = entryList[i-1];
118 }
119 else if (i == numLocalElements-1 && i < l_lastCont.val) {
120 // If we're last, we always think we're the last contiguous guy, unless someone non-contiguous is already here
121 l_lastCont.val = i;
122 l_lastCont.loc = entry_i;
123 }
124
125 },Kokkos::Min<GO>(minMyGID),Kokkos::Max<GO>(maxMyGID),Kokkos::Min<GO>(firstContiguousGID),Kokkos::MinLoc<LO,GO>(myMinLoc));
126
127 // This switch is intentional, since we're using MinLoc backwards
128 lastContiguousGID_val = myMinLoc.loc;
129 lastContiguousGID_loc = myMinLoc.val;
130 }
131
132
133} // namespace (anonymous)
134
135namespace Tpetra {
136
137 template <class LocalOrdinal, class GlobalOrdinal, class Node>
139 Map () :
140 comm_ (new Teuchos::SerialComm<int> ()),
141 indexBase_ (0),
142 numGlobalElements_ (0),
143 numLocalElements_ (0),
144 minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
145 maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
146 minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
147 maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
148 firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
149 lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
150 uniform_ (false), // trivially
151 contiguous_ (false),
152 distributed_ (false), // no communicator yet
153 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
154 {
157 }
158
159
160 template <class LocalOrdinal, class GlobalOrdinal, class Node>
162 Map (const global_size_t numGlobalElements,
163 const global_ordinal_type indexBase,
164 const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
165 const LocalGlobal lOrG) :
166 comm_ (comm),
167 uniform_ (true),
168 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
169 {
170 using Teuchos::as;
171 using Teuchos::broadcast;
172 using Teuchos::outArg;
173 using Teuchos::reduceAll;
174 using Teuchos::REDUCE_MIN;
175 using Teuchos::REDUCE_MAX;
176 using Teuchos::typeName;
177 using std::endl;
178 using GO = global_ordinal_type;
179 using GST = global_size_t;
180 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
181 const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
182 const char exPfx[] =
183 "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
184
185 const bool debug = Details::Behavior::debug("Map");
186 const bool verbose = Details::Behavior::verbose("Map");
187 std::unique_ptr<std::string> prefix;
188 if (verbose) {
189 prefix = Details::createPrefix(
190 comm_.getRawPtr(), "Map", funcName);
191 std::ostringstream os;
192 os << *prefix << "Start" << endl;
193 std::cerr << os.str();
194 }
197
198 // In debug mode only, check whether numGlobalElements and
199 // indexBase are the same over all processes in the communicator.
200 if (debug) {
201 GST proc0NumGlobalElements = numGlobalElements;
202 broadcast(*comm_, 0, outArg(proc0NumGlobalElements));
203 GST minNumGlobalElements = numGlobalElements;
204 GST maxNumGlobalElements = numGlobalElements;
205 reduceAll(*comm, REDUCE_MIN, numGlobalElements,
206 outArg(minNumGlobalElements));
207 reduceAll(*comm, REDUCE_MAX, numGlobalElements,
208 outArg(maxNumGlobalElements));
209 TEUCHOS_TEST_FOR_EXCEPTION
210 (minNumGlobalElements != maxNumGlobalElements ||
211 numGlobalElements != minNumGlobalElements,
212 std::invalid_argument, exPfx << "All processes must "
213 "provide the same number of global elements. Process 0 set "
214 "numGlobalElements="<< proc0NumGlobalElements << ". The "
215 "calling process " << comm->getRank() << " set "
216 "numGlobalElements=" << numGlobalElements << ". The min "
217 "and max values over all processes are "
218 << minNumGlobalElements << " resp. " << maxNumGlobalElements
219 << ".");
220
221 GO proc0IndexBase = indexBase;
222 broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
223 GO minIndexBase = indexBase;
224 GO maxIndexBase = indexBase;
225 reduceAll(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
226 reduceAll(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
227 TEUCHOS_TEST_FOR_EXCEPTION
228 (minIndexBase != maxIndexBase || indexBase != minIndexBase,
229 std::invalid_argument, exPfx << "All processes must "
230 "provide the same indexBase argument. Process 0 set "
231 "indexBase=" << proc0IndexBase << ". The calling process "
232 << comm->getRank() << " set indexBase=" << indexBase
233 << ". The min and max values over all processes are "
234 << minIndexBase << " resp. " << maxIndexBase << ".");
235 }
236
237 // Distribute the elements across the processes in the given
238 // communicator so that global IDs (GIDs) are
239 //
240 // - Nonoverlapping (only one process owns each GID)
241 // - Contiguous (the sequence of GIDs is nondecreasing, and no two
242 // adjacent GIDs differ by more than one)
243 // - As evenly distributed as possible (the numbers of GIDs on two
244 // different processes do not differ by more than one)
245
246 // All processes have the same numGlobalElements, but we still
247 // need to check that it is valid. numGlobalElements must be
248 // positive and not the "invalid" value (GSTI).
249 //
250 // This comparison looks funny, but it avoids compiler warnings
251 // for comparing unsigned integers (numGlobalElements_in is a
252 // GST, which is unsigned) while still working if we
253 // later decide to make GST signed.
254 TEUCHOS_TEST_FOR_EXCEPTION(
255 (numGlobalElements < 1 && numGlobalElements != 0),
256 std::invalid_argument, exPfx << "numGlobalElements (= "
257 << numGlobalElements << ") must be nonnegative.");
258
259 TEUCHOS_TEST_FOR_EXCEPTION
260 (numGlobalElements == GSTI, std::invalid_argument, exPfx <<
261 "You provided numGlobalElements = Teuchos::OrdinalTraits<"
262 "Tpetra::global_size_t>::invalid(). This version of the "
263 "constructor requires a valid value of numGlobalElements. "
264 "You probably mistook this constructor for the \"contiguous "
265 "nonuniform\" constructor, which can compute the global "
266 "number of elements for you if you set numGlobalElements to "
267 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
268
269 size_t numLocalElements = 0; // will set below
270 if (lOrG == GloballyDistributed) {
271 // Compute numLocalElements:
272 //
273 // If numGlobalElements == numProcs * B + remainder,
274 // then Proc r gets B+1 elements if r < remainder,
275 // and B elements if r >= remainder.
276 //
277 // This strategy is valid for any value of numGlobalElements and
278 // numProcs, including the following border cases:
279 // - numProcs == 1
280 // - numLocalElements < numProcs
281 //
282 // In the former case, remainder == 0 && numGlobalElements ==
283 // numLocalElements. In the latter case, remainder ==
284 // numGlobalElements && numLocalElements is either 0 or 1.
285 const GST numProcs = static_cast<GST> (comm_->getSize ());
286 const GST myRank = static_cast<GST> (comm_->getRank ());
287 const GST quotient = numGlobalElements / numProcs;
288 const GST remainder = numGlobalElements - quotient * numProcs;
289
290 GO startIndex;
291 if (myRank < remainder) {
292 numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
293 // myRank was originally an int, so it should never overflow
294 // reasonable GO types.
295 startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
296 } else {
297 numLocalElements = as<size_t> (quotient);
298 startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
299 as<GO> (remainder);
300 }
301
302 minMyGID_ = indexBase + startIndex;
303 maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
304 minAllGID_ = indexBase;
305 maxAllGID_ = indexBase + numGlobalElements - 1;
306 distributed_ = (numProcs > 1);
307 }
308 else { // lOrG == LocallyReplicated
309 numLocalElements = as<size_t> (numGlobalElements);
310 minMyGID_ = indexBase;
311 maxMyGID_ = indexBase + numGlobalElements - 1;
312 distributed_ = false;
313 }
314
315 minAllGID_ = indexBase;
316 maxAllGID_ = indexBase + numGlobalElements - 1;
317 indexBase_ = indexBase;
318 numGlobalElements_ = numGlobalElements;
319 numLocalElements_ = numLocalElements;
320 firstContiguousGID_ = minMyGID_;
321 lastContiguousGID_ = maxMyGID_;
322 contiguous_ = true;
323
324 // Create the Directory on demand in getRemoteIndexList().
325 //setupDirectory ();
326
327 if (verbose) {
328 std::ostringstream os;
329 os << *prefix << "Done" << endl;
330 std::cerr << os.str();
331 }
332 }
333
334
335 template <class LocalOrdinal, class GlobalOrdinal, class Node>
337 Map (const global_size_t numGlobalElements,
338 const size_t numLocalElements,
339 const global_ordinal_type indexBase,
340 const Teuchos::RCP<const Teuchos::Comm<int> > &comm) :
341 comm_ (comm),
342 uniform_ (false),
343 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
344 {
345 using Teuchos::as;
346 using Teuchos::broadcast;
347 using Teuchos::outArg;
348 using Teuchos::reduceAll;
349 using Teuchos::REDUCE_MIN;
350 using Teuchos::REDUCE_MAX;
351 using Teuchos::REDUCE_SUM;
352 using Teuchos::scan;
353 using std::endl;
354 using GO = global_ordinal_type;
355 using GST = global_size_t;
356 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
357 const char funcName[] =
358 "Map(gblNumInds,lclNumInds,indexBase,comm)";
359 const char exPfx[] =
360 "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
361 const char suffix[] =
362 ". Please report this bug to the Tpetra developers.";
363
364 const bool debug = Details::Behavior::debug("Map");
365 const bool verbose = Details::Behavior::verbose("Map");
366 std::unique_ptr<std::string> prefix;
367 if (verbose) {
368 prefix = Details::createPrefix(
369 comm_.getRawPtr(), "Map", funcName);
370 std::ostringstream os;
371 os << *prefix << "Start" << endl;
372 std::cerr << os.str();
373 }
376
377 // Global sum of numLocalElements over all processes.
378 // Keep this for later debug checks.
379 GST debugGlobalSum {};
380 if (debug) {
381 debugGlobalSum = initialNonuniformDebugCheck(exPfx,
382 numGlobalElements, numLocalElements, indexBase, comm);
383 }
384
385 // Distribute the elements across the nodes so that they are
386 // - non-overlapping
387 // - contiguous
388
389 // This differs from the first Map constructor (that only takes a
390 // global number of elements) in that the user has specified the
391 // number of local elements, so that the elements are not
392 // (necessarily) evenly distributed over the processes.
393
394 // Compute my local offset. This is an inclusive scan, so to get
395 // the final offset, we subtract off the input.
396 GO scanResult = 0;
397 scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
398 const GO myOffset = scanResult - numLocalElements;
399
400 if (numGlobalElements != GSTI) {
401 numGlobalElements_ = numGlobalElements; // Use the user's value.
402 }
403 else {
404 // Inclusive scan means that the last process has the final sum.
405 // Rather than doing a reduceAll to get the sum of
406 // numLocalElements, we can just have the last process broadcast
407 // its result. That saves us a round of log(numProcs) messages.
408 const int numProcs = comm->getSize ();
409 GST globalSum = scanResult;
410 if (numProcs > 1) {
411 broadcast (*comm, numProcs - 1, outArg (globalSum));
412 }
413 numGlobalElements_ = globalSum;
414
415 if (debug) {
416 // No need for an all-reduce here; both come from collectives.
417 TEUCHOS_TEST_FOR_EXCEPTION
418 (globalSum != debugGlobalSum, std::logic_error, exPfx <<
419 "globalSum = " << globalSum << " != debugGlobalSum = " <<
420 debugGlobalSum << suffix);
421 }
422 }
423 numLocalElements_ = numLocalElements;
424 indexBase_ = indexBase;
425 minAllGID_ = (numGlobalElements_ == 0) ?
426 std::numeric_limits<GO>::max () :
427 indexBase;
428 maxAllGID_ = (numGlobalElements_ == 0) ?
429 std::numeric_limits<GO>::lowest () :
430 indexBase + GO(numGlobalElements_) - GO(1);
431 minMyGID_ = (numLocalElements_ == 0) ?
432 std::numeric_limits<GO>::max () :
433 indexBase + GO(myOffset);
434 maxMyGID_ = (numLocalElements_ == 0) ?
435 std::numeric_limits<GO>::lowest () :
436 indexBase + myOffset + GO(numLocalElements) - GO(1);
437 firstContiguousGID_ = minMyGID_;
438 lastContiguousGID_ = maxMyGID_;
439 contiguous_ = true;
440 distributed_ = checkIsDist ();
441
442 // Create the Directory on demand in getRemoteIndexList().
443 //setupDirectory ();
444
445 if (verbose) {
446 std::ostringstream os;
447 os << *prefix << "Done" << endl;
448 std::cerr << os.str();
449 }
450 }
451
452 template <class LocalOrdinal, class GlobalOrdinal, class Node>
456 const char errorMessagePrefix[],
457 const global_size_t numGlobalElements,
458 const size_t numLocalElements,
459 const global_ordinal_type indexBase,
460 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const
461 {
462 const bool debug = Details::Behavior::debug("Map");
463 if (! debug) {
464 return global_size_t(0);
465 }
466
467 using Teuchos::broadcast;
468 using Teuchos::outArg;
469 using Teuchos::ptr;
470 using Teuchos::REDUCE_MAX;
471 using Teuchos::REDUCE_MIN;
472 using Teuchos::REDUCE_SUM;
473 using Teuchos::reduceAll;
474 using GO = global_ordinal_type;
475 using GST = global_size_t;
476 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
477
478 // The user has specified the distribution of indices over the
479 // processes. The distribution is not necessarily contiguous or
480 // equally shared over the processes.
481 //
482 // We assume that the number of local elements can be stored in a
483 // size_t. The instance member numLocalElements_ is a size_t, so
484 // this variable and that should have the same type.
485
486 GST debugGlobalSum = 0; // Will be global sum of numLocalElements
487 reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
488 outArg (debugGlobalSum));
489 // In debug mode only, check whether numGlobalElements and
490 // indexBase are the same over all processes in the communicator.
491 {
492 GST proc0NumGlobalElements = numGlobalElements;
493 broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
494 GST minNumGlobalElements = numGlobalElements;
495 GST maxNumGlobalElements = numGlobalElements;
496 reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
497 outArg (minNumGlobalElements));
498 reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
499 outArg (maxNumGlobalElements));
500 TEUCHOS_TEST_FOR_EXCEPTION
501 (minNumGlobalElements != maxNumGlobalElements ||
502 numGlobalElements != minNumGlobalElements,
503 std::invalid_argument, errorMessagePrefix << "All processes "
504 "must provide the same number of global elements, even if "
505 "that argument is "
506 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
507 "(which signals that the Map should compute the global "
508 "number of elements). Process 0 set numGlobalElements"
509 "=" << proc0NumGlobalElements << ". The calling process "
510 << comm->getRank() << " set numGlobalElements=" <<
511 numGlobalElements << ". The min and max values over all "
512 "processes are " << minNumGlobalElements << " resp. " <<
513 maxNumGlobalElements << ".");
514
515 GO proc0IndexBase = indexBase;
516 broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
517 GO minIndexBase = indexBase;
518 GO maxIndexBase = indexBase;
519 reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
520 reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
521 TEUCHOS_TEST_FOR_EXCEPTION
522 (minIndexBase != maxIndexBase || indexBase != minIndexBase,
523 std::invalid_argument, errorMessagePrefix <<
524 "All processes must provide the same indexBase argument. "
525 "Process 0 set indexBase = " << proc0IndexBase << ". The "
526 "calling process " << comm->getRank() << " set indexBase="
527 << indexBase << ". The min and max values over all "
528 "processes are " << minIndexBase << " resp. " << maxIndexBase
529 << ".");
530
531 // Make sure that the sum of numLocalElements over all processes
532 // equals numGlobalElements.
533 TEUCHOS_TEST_FOR_EXCEPTION
534 (numGlobalElements != GSTI &&
535 debugGlobalSum != numGlobalElements, std::invalid_argument,
536 errorMessagePrefix << "The sum of each process' number of "
537 "indices over all processes, " << debugGlobalSum << ", != "
538 << "numGlobalElements=" << numGlobalElements << ". If you "
539 "would like this constructor to compute numGlobalElements "
540 "for you, you may set numGlobalElements="
541 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
542 "on input. Please note that this is NOT necessarily -1.");
543 }
544 return debugGlobalSum;
545 }
546
547 template <class LocalOrdinal, class GlobalOrdinal, class Node>
548 void
551 const char errorMessagePrefix[],
552 const global_size_t numGlobalElements,
553 const Kokkos::View<const global_ordinal_type*,
554 Kokkos::LayoutLeft,
555 Kokkos::HostSpace,
556 Kokkos::MemoryUnmanaged>& entryList_host,
557 const global_ordinal_type indexBase,
558 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
559 {
560 Tpetra::Details::ProfilingRegion pr("Map::initWithNonownedHostIndexList()");
561
562 using Kokkos::LayoutLeft;
563 using Kokkos::subview;
564 using Kokkos::View;
565 using Kokkos::view_alloc;
566 using Kokkos::WithoutInitializing;
567 using Teuchos::as;
568 using Teuchos::broadcast;
569 using Teuchos::outArg;
570 using Teuchos::ptr;
571 using Teuchos::REDUCE_MAX;
572 using Teuchos::REDUCE_MIN;
573 using Teuchos::REDUCE_SUM;
574 using Teuchos::reduceAll;
575 using LO = local_ordinal_type;
576 using GO = global_ordinal_type;
577 using GST = global_size_t;
578 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
579 // Make sure that Kokkos has been initialized (Github Issue #513).
580 TEUCHOS_TEST_FOR_EXCEPTION
581 (! Kokkos::is_initialized (), std::runtime_error,
582 errorMessagePrefix << "The Kokkos execution space "
583 << Teuchos::TypeNameTraits<execution_space>::name()
584 << " has not been initialized. "
585 "Please initialize it before creating a Map.")
586
587 // The user has specified the distribution of indices over the
588 // processes, via the input array of global indices on each
589 // process. The distribution is not necessarily contiguous or
590 // equally shared over the processes.
591
592 // The length of the input array on this process is the number of
593 // local indices to associate with this process, even though the
594 // input array contains global indices. We assume that the number
595 // of local indices on a process can be stored in a size_t;
596 // numLocalElements_ is a size_t, so this variable and that should
597 // have the same type.
598 const size_t numLocalElements(entryList_host.size());
599
600 initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
601 numLocalElements, indexBase, comm);
602
603 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
604 // reduction is redundant, since the directory Map will have to do
605 // the same thing. Thus, we could do the scan and broadcast for
606 // the directory Map here, and give the computed offsets to the
607 // directory Map's constructor. However, a reduction costs less
608 // than a scan and broadcast, so this still saves time if users of
609 // this Map don't ever need the Directory (i.e., if they never
610 // call getRemoteIndexList on this Map).
611 if (numGlobalElements != GSTI) {
612 numGlobalElements_ = numGlobalElements; // Use the user's value.
613 }
614 else { // The user wants us to compute the sum.
615 reduceAll(*comm, REDUCE_SUM,
616 static_cast<GST>(numLocalElements),
617 outArg(numGlobalElements_));
618 }
619
620 // mfh 20 Feb 2013: We've never quite done the right thing for
621 // duplicate GIDs here. Duplicate GIDs have always been counted
622 // distinctly in numLocalElements_, and thus should get a
623 // different LID. However, we've always used std::map or a hash
624 // table for the GID -> LID lookup table, so distinct GIDs always
625 // map to the same LID. Furthermore, the order of the input GID
626 // list matters, so it's not desirable to sort for determining
627 // uniqueness.
628 //
629 // I've chosen for now to write this code as if the input GID list
630 // contains no duplicates. If this is not desired, we could use
631 // the lookup table itself to determine uniqueness: If we haven't
632 // seen the GID before, it gets a new LID and it's added to the
633 // LID -> GID and GID -> LID tables. If we have seen the GID
634 // before, it doesn't get added to either table. I would
635 // implement this, but it would cost more to do the double lookups
636 // in the table (one to check, and one to insert).
637 //
638 // More importantly, since we build the GID -> LID table in (a
639 // thread-) parallel (way), the order in which duplicate GIDs may
640 // get inserted is not defined. This would make the assignment of
641 // LID to GID nondeterministic.
642
643 numLocalElements_ = numLocalElements;
644 indexBase_ = indexBase;
645
646 minMyGID_ = indexBase_;
647 maxMyGID_ = indexBase_;
648
649 // NOTE (mfh 27 May 2015): While finding the initial contiguous
650 // GID range requires looking at all the GIDs in the range,
651 // dismissing an interval of GIDs only requires looking at the
652 // first and last GIDs. Thus, we could do binary search backwards
653 // from the end in order to catch the common case of a contiguous
654 // interval followed by noncontiguous entries. On the other hand,
655 // we could just expose this case explicitly as yet another Map
656 // constructor, and avoid the trouble of detecting it.
657 if (numLocalElements_ > 0) {
658 // Find contiguous GID range, with the restriction that the
659 // beginning of the range starts with the first entry. While
660 // doing so, fill in the LID -> GID table.
661 typename decltype (lgMap_)::non_const_type lgMap
662 (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
663 auto lgMap_host =
664 Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
665
666 // The input array entryList_host is already on host, so we
667 // don't need to take a host view of it.
668 // auto entryList_host =
669 // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
670 // Kokkos::deep_copy (entryList_host, entryList);
671
672 firstContiguousGID_ = entryList_host[0];
673 lastContiguousGID_ = firstContiguousGID_+1;
674
675 // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
676 // anyway, so we have to look at them all. The logical way to
677 // find the first noncontiguous entry would thus be to "reduce,"
678 // where the local reduction result is whether entryList[i] + 1
679 // == entryList[i+1].
680
681 lgMap_host[0] = firstContiguousGID_;
682 size_t i = 1;
683 for ( ; i < numLocalElements_; ++i) {
684 const GO curGid = entryList_host[i];
685 const LO curLid = as<LO> (i);
686
687 if (lastContiguousGID_ != curGid) break;
688
689 // Add the entry to the LID->GID table only after we know that
690 // the current GID is in the initial contiguous sequence, so
691 // that we don't repeat adding it in the first iteration of
692 // the loop below over the remaining noncontiguous GIDs.
693 lgMap_host[curLid] = curGid;
694 ++lastContiguousGID_;
695 }
696 --lastContiguousGID_;
697 // NOTE: i is the first non-contiguous index.
698
699 // [firstContiguousGID_, lastContigousGID_] is the initial
700 // sequence of contiguous GIDs. We can start the min and max
701 // GID using this range.
702 minMyGID_ = firstContiguousGID_;
703 maxMyGID_ = lastContiguousGID_;
704
705 // Compute the GID -> LID lookup table, _not_ including the
706 // initial sequence of contiguous GIDs.
707 LO firstNonContiguous_loc=i;
708 {
709
710 const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
711 auto nonContigGids_host = subview (entryList_host, ncRange);
712 TEUCHOS_TEST_FOR_EXCEPTION
713 (static_cast<size_t> (nonContigGids_host.extent (0)) !=
714 static_cast<size_t> (entryList_host.extent (0) - i),
715 std::logic_error, "Tpetra::Map noncontiguous constructor: "
716 "nonContigGids_host.extent(0) = "
717 << nonContigGids_host.extent (0)
718 << " != entryList_host.extent(0) - i = "
719 << (entryList_host.extent (0) - i) << " = "
720 << entryList_host.extent (0) << " - " << i
721 << ". Please report this bug to the Tpetra developers.");
722
723 // FixedHashTable's constructor expects an owned device View,
724 // so we must deep-copy the subview of the input indices.
725 View<GO*, LayoutLeft, device_type>
726 nonContigGids (view_alloc ("nonContigGids", WithoutInitializing),
727 nonContigGids_host.size ());
728
729
730 // DEEP_COPY REVIEW - HOST-TO-DEVICE
731 Kokkos::deep_copy (execution_space(), nonContigGids, nonContigGids_host);
732 Kokkos::fence("Map::initWithNonownedHostIndexList"); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
733
734 glMap_ = global_to_local_table_type(nonContigGids,
735 firstNonContiguous_loc);
736 // Make host version - when memory spaces match these just do trivial assignment
737 glMapHost_ = global_to_local_table_host_type(glMap_);
738 }
739
740 // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
741 // table above, we have to look at all the (noncontiguous) input
742 // indices anyway. Thus, why not have the constructor compute
743 // and return the min and max?
744
745 for ( ; i < numLocalElements_; ++i) {
746 const GO curGid = entryList_host[i];
747 const LO curLid = static_cast<LO> (i);
748 lgMap_host[curLid] = curGid; // LID -> GID table
749
750 // While iterating through entryList, we compute its
751 // (process-local) min and max elements.
752 if (curGid < minMyGID_) {
753 minMyGID_ = curGid;
754 }
755 if (curGid > maxMyGID_) {
756 maxMyGID_ = curGid;
757 }
758 }
759
760 // We filled lgMap on host above; now sync back to device.
761 // DEEP_COPY REVIEW - HOST-TO-DEVICE
762 Kokkos::deep_copy (execution_space(), lgMap, lgMap_host);
763
764 // "Commit" the local-to-global lookup table we filled in above.
765 lgMap_ = lgMap;
766 // We've already created this, so use it.
767 lgMapHost_ = lgMap_host;
768
769
770 }
771 else {
772 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
773 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
774 // This insures tests for GIDs in the range
775 // [firstContiguousGID_, lastContiguousGID_] fail for processes
776 // with no local elements.
777 firstContiguousGID_ = indexBase_+1;
778 lastContiguousGID_ = indexBase_;
779 // glMap_ was default constructed, so it's already empty.
780
781 }
782
783
784
785
786 // Compute the min and max of all processes' GIDs. If
787 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
788 // are both indexBase_. This is wrong, but fixing it would
789 // require either a fancy sparse all-reduce, or a custom reduction
790 // operator that ignores invalid values ("invalid" means
791 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
792 //
793 // Also, while we're at it, use the same all-reduce to figure out
794 // if the Map is distributed. "Distributed" means that there is
795 // at least one process with a number of local elements less than
796 // the number of global elements.
797 //
798 // We're computing the min and max of all processes' GIDs using a
799 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
800 // and y are signed). (This lets us combine the min and max into
801 // a single all-reduce.) If each process sets localDist=1 if its
802 // number of local elements is strictly less than the number of
803 // global elements, and localDist=0 otherwise, then a MAX
804 // all-reduce on localDist tells us if the Map is distributed (1
805 // if yes, 0 if no). Thus, we can append localDist onto the end
806 // of the data and get the global result from the all-reduce.
807 if (std::numeric_limits<GO>::is_signed) {
808 // Does my process NOT own all the elements?
809 const GO localDist =
810 (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
811
812 GO minMaxInput[3];
813 minMaxInput[0] = -minMyGID_;
814 minMaxInput[1] = maxMyGID_;
815 minMaxInput[2] = localDist;
816
817 GO minMaxOutput[3];
818 minMaxOutput[0] = 0;
819 minMaxOutput[1] = 0;
820 minMaxOutput[2] = 0;
821 reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
822 minAllGID_ = -minMaxOutput[0];
823 maxAllGID_ = minMaxOutput[1];
824 const GO globalDist = minMaxOutput[2];
825 distributed_ = (comm_->getSize () > 1 && globalDist == 1);
826 }
827 else { // unsigned; use two reductions
828 // This is always correct, no matter the signedness of GO.
829 reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
830 reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
831 distributed_ = checkIsDist ();
832 }
833
834 contiguous_ = false; // "Contiguous" is conservative.
835
836 TEUCHOS_TEST_FOR_EXCEPTION(
837 minAllGID_ < indexBase_,
838 std::invalid_argument,
839 "Tpetra::Map constructor (noncontiguous): "
840 "Minimum global ID = " << minAllGID_ << " over all process(es) is "
841 "less than the given indexBase = " << indexBase_ << ".");
842
843 // Create the Directory on demand in getRemoteIndexList().
844 //setupDirectory ();
845 }
846
847 template <class LocalOrdinal, class GlobalOrdinal, class Node>
849 Map (const global_size_t numGlobalElements,
850 const GlobalOrdinal indexList[],
851 const LocalOrdinal indexListSize,
852 const GlobalOrdinal indexBase,
853 const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
854 comm_ (comm),
855 uniform_ (false),
856 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
857 {
858 using std::endl;
859 const char funcName[] =
860 "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
861
862 const bool verbose = Details::Behavior::verbose("Map");
863 std::unique_ptr<std::string> prefix;
864 if (verbose) {
865 prefix = Details::createPrefix(
866 comm_.getRawPtr(), "Map", funcName);
867 std::ostringstream os;
868 os << *prefix << "Start" << endl;
869 std::cerr << os.str();
870 }
874 checkMapInputArray ("(GST, const GO[], LO, GO, comm)",
875 indexList, static_cast<size_t> (indexListSize),
876 comm.getRawPtr ());
877 // Not quite sure if I trust all code to behave correctly if the
878 // pointer is nonnull but the array length is nonzero, so I'll
879 // make sure the raw pointer is null if the length is zero.
880 const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
881 Kokkos::View<const GlobalOrdinal*,
882 Kokkos::LayoutLeft,
883 Kokkos::HostSpace,
884 Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
885 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
886 indexBase, comm);
887 if (verbose) {
888 std::ostringstream os;
889 os << *prefix << "Done" << endl;
890 std::cerr << os.str();
891 }
892 }
893
894 template <class LocalOrdinal, class GlobalOrdinal, class Node>
896 Map (const global_size_t numGlobalElements,
897 const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
898 const GlobalOrdinal indexBase,
899 const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
900 comm_ (comm),
901 uniform_ (false),
902 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
903 {
904 using std::endl;
905 const char* funcName = "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
906
907 const bool verbose = Details::Behavior::verbose("Map");
908 std::unique_ptr<std::string> prefix;
909 if (verbose) {
910 prefix = Details::createPrefix(
911 comm_.getRawPtr(), "Map", funcName);
912 std::ostringstream os;
913 os << *prefix << "Start" << endl;
914 std::cerr << os.str();
915 }
919 const size_t numLclInds = static_cast<size_t> (entryList.size ());
920 checkMapInputArray ("(GST, ArrayView, GO, comm)",
921 entryList.getRawPtr (), numLclInds,
922 comm.getRawPtr ());
923 // Not quite sure if I trust both ArrayView and View to behave
924 // correctly if the pointer is nonnull but the array length is
925 // nonzero, so I'll make sure it's null if the length is zero.
926 const GlobalOrdinal* const indsRaw =
927 numLclInds == 0 ? NULL : entryList.getRawPtr ();
928 Kokkos::View<const GlobalOrdinal*,
929 Kokkos::LayoutLeft,
930 Kokkos::HostSpace,
931 Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
932 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
933 indexBase, comm);
934 if (verbose) {
935 std::ostringstream os;
936 os << *prefix << "Done" << endl;
937 std::cerr << os.str();
938 }
939 }
940
941
942 template <class LocalOrdinal, class GlobalOrdinal, class Node>
944 Map (const global_size_t numGlobalElements,
945 const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
946 const GlobalOrdinal indexBase,
947 const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
948 comm_ (comm),
949 uniform_ (false),
950 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
951 {
952 using Kokkos::LayoutLeft;
953 using Kokkos::subview;
954 using Kokkos::View;
955 using Kokkos::view_alloc;
956 using Kokkos::WithoutInitializing;
957 using Teuchos::arcp;
958 using Teuchos::ArrayView;
959 using Teuchos::as;
960 using Teuchos::broadcast;
961 using Teuchos::outArg;
962 using Teuchos::ptr;
963 using Teuchos::REDUCE_MAX;
964 using Teuchos::REDUCE_MIN;
965 using Teuchos::REDUCE_SUM;
966 using Teuchos::reduceAll;
967 using Teuchos::typeName;
968 using std::endl;
969 using LO = local_ordinal_type;
970 using GO = global_ordinal_type;
971 using GST = global_size_t;
972 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
973 const char funcName[] =
974 "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
975
976 const bool verbose = Details::Behavior::verbose("Map");
977 std::unique_ptr<std::string> prefix;
978 if (verbose) {
979 prefix = Details::createPrefix(
980 comm_.getRawPtr(), "Map", funcName);
981 std::ostringstream os;
982 os << *prefix << "Start" << endl;
983 std::cerr << os.str();
984 }
988 checkMapInputArray ("(GST, Kokkos::View, GO, comm)",
989 entryList.data (),
990 static_cast<size_t> (entryList.extent (0)),
991 comm.getRawPtr ());
992
993 // The user has specified the distribution of indices over the
994 // processes, via the input array of global indices on each
995 // process. The distribution is not necessarily contiguous or
996 // equally shared over the processes.
997
998 // The length of the input array on this process is the number of
999 // local indices to associate with this process, even though the
1000 // input array contains global indices. We assume that the number
1001 // of local indices on a process can be stored in a size_t;
1002 // numLocalElements_ is a size_t, so this variable and that should
1003 // have the same type.
1004 const size_t numLocalElements(entryList.size());
1005
1006 initialNonuniformDebugCheck(funcName, numGlobalElements,
1007 numLocalElements, indexBase, comm);
1008
1009 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
1010 // reduction is redundant, since the directory Map will have to do
1011 // the same thing. Thus, we could do the scan and broadcast for
1012 // the directory Map here, and give the computed offsets to the
1013 // directory Map's constructor. However, a reduction costs less
1014 // than a scan and broadcast, so this still saves time if users of
1015 // this Map don't ever need the Directory (i.e., if they never
1016 // call getRemoteIndexList on this Map).
1017 if (numGlobalElements != GSTI) {
1018 numGlobalElements_ = numGlobalElements; // Use the user's value.
1019 }
1020 else { // The user wants us to compute the sum.
1021 reduceAll(*comm, REDUCE_SUM,
1022 static_cast<GST>(numLocalElements),
1023 outArg(numGlobalElements_));
1024 }
1025
1026
1027 // mfh 20 Feb 2013: We've never quite done the right thing for
1028 // duplicate GIDs here. Duplicate GIDs have always been counted
1029 // distinctly in numLocalElements_, and thus should get a
1030 // different LID. However, we've always used std::map or a hash
1031 // table for the GID -> LID lookup table, so distinct GIDs always
1032 // map to the same LID. Furthermore, the order of the input GID
1033 // list matters, so it's not desirable to sort for determining
1034 // uniqueness.
1035 //
1036 // I've chosen for now to write this code as if the input GID list
1037 // contains no duplicates. If this is not desired, we could use
1038 // the lookup table itself to determine uniqueness: If we haven't
1039 // seen the GID before, it gets a new LID and it's added to the
1040 // LID -> GID and GID -> LID tables. If we have seen the GID
1041 // before, it doesn't get added to either table. I would
1042 // implement this, but it would cost more to do the double lookups
1043 // in the table (one to check, and one to insert).
1044 //
1045 // More importantly, since we build the GID -> LID table in (a
1046 // thread-) parallel (way), the order in which duplicate GIDs may
1047 // get inserted is not defined. This would make the assignment of
1048 // LID to GID nondeterministic.
1049
1050 numLocalElements_ = numLocalElements;
1051 indexBase_ = indexBase;
1052
1053 minMyGID_ = indexBase_;
1054 maxMyGID_ = indexBase_;
1055
1056 // NOTE (mfh 27 May 2015): While finding the initial contiguous
1057 // GID range requires looking at all the GIDs in the range,
1058 // dismissing an interval of GIDs only requires looking at the
1059 // first and last GIDs. Thus, we could do binary search backwards
1060 // from the end in order to catch the common case of a contiguous
1061 // interval followed by noncontiguous entries. On the other hand,
1062 // we could just expose this case explicitly as yet another Map
1063 // constructor, and avoid the trouble of detecting it.
1064 if (numLocalElements_ > 0) {
1065 // Find contiguous GID range, with the restriction that the
1066 // beginning of the range starts with the first entry. While
1067 // doing so, fill in the LID -> GID table.
1068 typename decltype (lgMap_)::non_const_type lgMap
1069 (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
1070
1071 // Because you can't use lambdas in constructors on CUDA. Or using private/protected data.
1072 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
1073 Kokkos::deep_copy(typename device_type::execution_space(),lgMap,entryList);
1074 LO lastContiguousGID_loc;
1075 computeConstantsOnDevice(entryList,minMyGID_,maxMyGID_,firstContiguousGID_,lastContiguousGID_,lastContiguousGID_loc);
1076 LO firstNonContiguous_loc = lastContiguousGID_loc+1;
1077 auto nonContigGids = Kokkos::subview(entryList,std::pair<size_t,size_t>(firstNonContiguous_loc,entryList.extent(0)));
1078
1079 // NOTE: We do not fill the glMapHost_ and lgMapHost_ views here. They will be filled lazily later
1080 glMap_ = global_to_local_table_type(nonContigGids,
1081 firstNonContiguous_loc);
1082
1083 // "Commit" the local-to-global lookup table we filled in above.
1084 lgMap_ = lgMap;
1085
1086 }
1087 else {
1088 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1089 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1090 // This insures tests for GIDs in the range
1091 // [firstContiguousGID_, lastContiguousGID_] fail for processes
1092 // with no local elements.
1093 firstContiguousGID_ = indexBase_+1;
1094 lastContiguousGID_ = indexBase_;
1095 // glMap_ was default constructed, so it's already empty.
1096 }
1097
1098
1099 // Compute the min and max of all processes' GIDs. If
1100 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1101 // are both indexBase_. This is wrong, but fixing it would
1102 // require either a fancy sparse all-reduce, or a custom reduction
1103 // operator that ignores invalid values ("invalid" means
1104 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1105 //
1106 // Also, while we're at it, use the same all-reduce to figure out
1107 // if the Map is distributed. "Distributed" means that there is
1108 // at least one process with a number of local elements less than
1109 // the number of global elements.
1110 //
1111 // We're computing the min and max of all processes' GIDs using a
1112 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1113 // and y are signed). (This lets us combine the min and max into
1114 // a single all-reduce.) If each process sets localDist=1 if its
1115 // number of local elements is strictly less than the number of
1116 // global elements, and localDist=0 otherwise, then a MAX
1117 // all-reduce on localDist tells us if the Map is distributed (1
1118 // if yes, 0 if no). Thus, we can append localDist onto the end
1119 // of the data and get the global result from the all-reduce.
1120 if (std::numeric_limits<GO>::is_signed) {
1121 // Does my process NOT own all the elements?
1122 const GO localDist =
1123 (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
1124
1125 GO minMaxInput[3];
1126 minMaxInput[0] = -minMyGID_;
1127 minMaxInput[1] = maxMyGID_;
1128 minMaxInput[2] = localDist;
1129
1130 GO minMaxOutput[3];
1131 minMaxOutput[0] = 0;
1132 minMaxOutput[1] = 0;
1133 minMaxOutput[2] = 0;
1134 reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1135 minAllGID_ = -minMaxOutput[0];
1136 maxAllGID_ = minMaxOutput[1];
1137 const GO globalDist = minMaxOutput[2];
1138 distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1139 }
1140 else { // unsigned; use two reductions
1141 // This is always correct, no matter the signedness of GO.
1142 reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1143 reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1144 distributed_ = checkIsDist ();
1145 }
1146
1147 contiguous_ = false; // "Contiguous" is conservative.
1148
1149 TEUCHOS_TEST_FOR_EXCEPTION(
1150 minAllGID_ < indexBase_,
1151 std::invalid_argument,
1152 "Tpetra::Map constructor (noncontiguous): "
1153 "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1154 "less than the given indexBase = " << indexBase_ << ".");
1155
1156 // Create the Directory on demand in getRemoteIndexList().
1157 //setupDirectory ();
1158
1159 if (verbose) {
1160 std::ostringstream os;
1161 os << *prefix << "Done" << endl;
1162 std::cerr << os.str();
1163 }
1164 }
1165
1166
1167 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1169 {
1170 if (! Kokkos::is_initialized ()) {
1171 std::ostringstream os;
1172 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1173 "Kokkos::finalize() has been called. This is user error! There are "
1174 "two likely causes: " << std::endl <<
1175 " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1176 << std::endl <<
1177 " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1178 "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1179 "or Tpetra::finalize()." << std::endl << std::endl <<
1180 "Don't do either of these! Please refer to GitHib Issue #2372."
1181 << std::endl;
1182 ::Tpetra::Details::printOnce (std::cerr, os.str (),
1183 this->getComm ().getRawPtr ());
1184 }
1185 else {
1189
1190 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1191 if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1192 mpiIsInitialized () && mpiIsFinalized ()) {
1193 // Tpetra itself does not require MPI, even if building with
1194 // MPI. It is legal to create Tpetra objects that do not use
1195 // MPI, even in an MPI program. However, calling Tpetra stuff
1196 // after MPI_Finalize() has been called is a bad idea, since
1197 // some Tpetra defaults may use MPI if available.
1198 std::ostringstream os;
1199 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1200 "MPI_Finalize() has been called. This is user error! There are "
1201 "two likely causes: " << std::endl <<
1202 " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1203 << std::endl <<
1204 " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1205 "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1206 "Tpetra::finalize()." << std::endl << std::endl <<
1207 "Don't do either of these! Please refer to GitHib Issue #2372."
1208 << std::endl;
1209 ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1210 }
1211 }
1212 // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1213 // because Tpetra does not yet require Tpetra::initialize /
1214 // Tpetra::finalize.
1215 }
1216
1217 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1218 bool
1220 {
1221 TEUCHOS_TEST_FOR_EXCEPTION(
1222 getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1223 "getComm() returns null. Please report this bug to the Tpetra "
1224 "developers.");
1225
1226 // This is a collective operation, if it hasn't been called before.
1227 setupDirectory ();
1228 return directory_->isOneToOne (*this);
1229 }
1230
1231 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1232 LocalOrdinal
1234 getLocalElement (GlobalOrdinal globalIndex) const
1235 {
1236 if (isContiguous ()) {
1237 if (globalIndex < getMinGlobalIndex () ||
1238 globalIndex > getMaxGlobalIndex ()) {
1239 return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1240 }
1241 return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1242 }
1243 else if (globalIndex >= firstContiguousGID_ &&
1244 globalIndex <= lastContiguousGID_) {
1245 return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1246 }
1247 else {
1248 // If the given global index is not in the table, this returns
1249 // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1250 // glMapHost_ is Host and does not assume UVM
1251 lazyPushToHost();
1252 return glMapHost_.get (globalIndex);
1253 }
1254 }
1255
1256 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1257 GlobalOrdinal
1259 getGlobalElement (LocalOrdinal localIndex) const
1260 {
1261 if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1262 return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1263 }
1264 if (isContiguous ()) {
1265 return getMinGlobalIndex () + localIndex;
1266 }
1267 else {
1268 // This is a host Kokkos::View access, with no RCP or ArrayRCP
1269 // involvement. As a result, it is thread safe.
1270 //
1271 // lgMapHost_ is a host pointer; this does NOT assume UVM.
1272 lazyPushToHost();
1273 return lgMapHost_[localIndex];
1274 }
1275 }
1276
1277 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1278 bool
1280 isNodeLocalElement (LocalOrdinal localIndex) const
1281 {
1282 if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1283 return false;
1284 } else {
1285 return true;
1286 }
1287 }
1288
1289 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1290 bool
1292 isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1293 return this->getLocalElement (globalIndex) !=
1294 Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1295 }
1296
1297 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1299 return uniform_;
1300 }
1301
1302 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1304 return contiguous_;
1305 }
1306
1307
1308 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1311 getLocalMap () const
1312 {
1313 return local_map_type (glMap_, lgMap_, getIndexBase (),
1315 firstContiguousGID_, lastContiguousGID_,
1317 }
1318
1319 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1320 bool
1323 {
1324 using Teuchos::outArg;
1325 using Teuchos::REDUCE_MIN;
1326 using Teuchos::reduceAll;
1327 //
1328 // Tests that avoid the Boolean all-reduce below by using
1329 // globally consistent quantities.
1330 //
1331 if (this == &map) {
1332 // Pointer equality on one process always implies pointer
1333 // equality on all processes, since Map is immutable.
1334 return true;
1335 }
1336 else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1337 // The two communicators have different numbers of processes.
1338 // It's not correct to call isCompatible() in that case. This
1339 // may result in the all-reduce hanging below.
1340 return false;
1341 }
1342 else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1343 // Two Maps are definitely NOT compatible if they have different
1344 // global numbers of indices.
1345 return false;
1346 }
1347 else if (isContiguous () && isUniform () &&
1348 map.isContiguous () && map.isUniform ()) {
1349 // Contiguous uniform Maps with the same number of processes in
1350 // their communicators, and with the same global numbers of
1351 // indices, are always compatible.
1352 return true;
1353 }
1354 else if (! isContiguous () && ! map.isContiguous () &&
1355 lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1356 lgMap_.data () == map.lgMap_.data ()) {
1357 // Noncontiguous Maps whose global index lists are nonempty and
1358 // have the same pointer must be the same (and therefore
1359 // contiguous).
1360 //
1361 // Nonempty is important. For example, consider a communicator
1362 // with two processes, and two Maps that share this
1363 // communicator, with zero global indices on the first process,
1364 // and different nonzero numbers of global indices on the second
1365 // process. In that case, on the first process, the pointers
1366 // would both be NULL.
1367 return true;
1368 }
1369
1370 TEUCHOS_TEST_FOR_EXCEPTION(
1371 getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1372 "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1373 "checked that this condition is true above, but it's false here. "
1374 "Please report this bug to the Tpetra developers.");
1375
1376 // Do both Maps have the same number of indices on each process?
1377 const int locallyCompat =
1378 (getLocalNumElements () == map.getLocalNumElements ()) ? 1 : 0;
1379
1380 int globallyCompat = 0;
1381 reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1382 return (globallyCompat == 1);
1383 }
1384
1385 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1386 bool
1389 {
1390 using Teuchos::ArrayView;
1391 using GO = global_ordinal_type;
1392 using size_type = typename ArrayView<const GO>::size_type;
1393
1394 // If both Maps are contiguous, we can compare their GID ranges
1395 // easily by looking at the min and max GID on this process.
1396 // Otherwise, we'll compare their GID lists. If only one Map is
1397 // contiguous, then we only have to call getLocalElementList() on
1398 // the noncontiguous Map. (It's best to avoid calling it on a
1399 // contiguous Map, since it results in unnecessary storage that
1400 // persists for the lifetime of the Map.)
1401
1402 if (this == &map) {
1403 // Pointer equality on one process always implies pointer
1404 // equality on all processes, since Map is immutable.
1405 return true;
1406 }
1407 else if (getLocalNumElements () != map.getLocalNumElements ()) {
1408 return false;
1409 }
1410 else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1411 getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1412 return false;
1413 }
1414 else {
1415 if (isContiguous ()) {
1416 if (map.isContiguous ()) {
1417 return true; // min and max match, so the ranges match.
1418 }
1419 else { // *this is contiguous, but map is not contiguous
1420 TEUCHOS_TEST_FOR_EXCEPTION(
1421 ! this->isContiguous () || map.isContiguous (), std::logic_error,
1422 "Tpetra::Map::locallySameAs: BUG");
1423 ArrayView<const GO> rhsElts = map.getLocalElementList ();
1424 const GO minLhsGid = this->getMinGlobalIndex ();
1425 const size_type numRhsElts = rhsElts.size ();
1426 for (size_type k = 0; k < numRhsElts; ++k) {
1427 const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1428 if (curLhsGid != rhsElts[k]) {
1429 return false; // stop on first mismatch
1430 }
1431 }
1432 return true;
1433 }
1434 }
1435 else if (map.isContiguous ()) { // *this is not contiguous, but map is
1436 TEUCHOS_TEST_FOR_EXCEPTION(
1437 this->isContiguous () || ! map.isContiguous (), std::logic_error,
1438 "Tpetra::Map::locallySameAs: BUG");
1439 ArrayView<const GO> lhsElts = this->getLocalElementList ();
1440 const GO minRhsGid = map.getMinGlobalIndex ();
1441 const size_type numLhsElts = lhsElts.size ();
1442 for (size_type k = 0; k < numLhsElts; ++k) {
1443 const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1444 if (curRhsGid != lhsElts[k]) {
1445 return false; // stop on first mismatch
1446 }
1447 }
1448 return true;
1449 }
1450 else if (this->lgMap_.data () == map.lgMap_.data ()) {
1451 // Pointers to LID->GID "map" (actually just an array) are the
1452 // same, and the number of GIDs are the same.
1453 return this->getLocalNumElements () == map.getLocalNumElements ();
1454 }
1455 else { // we actually have to compare the GIDs
1456 if (this->getLocalNumElements () != map.getLocalNumElements ()) {
1457 return false; // We already checked above, but check just in case
1458 }
1459 else {
1460 ArrayView<const GO> lhsElts = getLocalElementList ();
1461 ArrayView<const GO> rhsElts = map.getLocalElementList ();
1462
1463 // std::equal requires that the latter range is as large as
1464 // the former. We know the ranges have equal length, because
1465 // they have the same number of local entries.
1466 return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1467 }
1468 }
1469 }
1470 }
1471
1472 template <class LocalOrdinal,class GlobalOrdinal, class Node>
1473 bool
1476 {
1477 if (this == &map)
1478 return true;
1479
1480 // We are going to check if lmap1 is fitted into lmap2:
1481 // Is lmap1 (map) a subset of lmap2 (this)?
1482 // And do the first lmap1.getLocalNumElements() global elements
1483 // of lmap1,lmap2 owned on each process exactly match?
1484 auto lmap1 = map.getLocalMap();
1485 auto lmap2 = this->getLocalMap();
1486
1487 auto numLocalElements1 = lmap1.getLocalNumElements();
1488 auto numLocalElements2 = lmap2.getLocalNumElements();
1489
1490 if (numLocalElements1 > numLocalElements2) {
1491 // There are more indices in the first map on this process than in second map.
1492 return false;
1493 }
1494
1495 if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1496 // When both Maps are contiguous, just check the interval inclusion.
1497 return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1498 (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1499 }
1500
1501 if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1502 lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1503 // The second map does not include the first map bounds, and thus some of
1504 // the first map global indices are not in the second map.
1505 return false;
1506 }
1507
1508 using LO = local_ordinal_type;
1509 using range_type =
1510 Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1511
1512 // Check all elements.
1513 LO numDiff = 0;
1514 Kokkos::parallel_reduce(
1515 "isLocallyFitted",
1516 range_type(0, numLocalElements1),
1517 KOKKOS_LAMBDA (const LO i, LO& diff) {
1518 diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1519 }, numDiff);
1520
1521 return (numDiff == 0);
1522 }
1523
1524 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1525 bool
1528 {
1529 using Teuchos::outArg;
1530 using Teuchos::REDUCE_MIN;
1531 using Teuchos::reduceAll;
1532 //
1533 // Tests that avoid the Boolean all-reduce below by using
1534 // globally consistent quantities.
1535 //
1536 if (this == &map) {
1537 // Pointer equality on one process always implies pointer
1538 // equality on all processes, since Map is immutable.
1539 return true;
1540 }
1541 else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1542 // The two communicators have different numbers of processes.
1543 // It's not correct to call isSameAs() in that case. This
1544 // may result in the all-reduce hanging below.
1545 return false;
1546 }
1547 else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1548 // Two Maps are definitely NOT the same if they have different
1549 // global numbers of indices.
1550 return false;
1551 }
1552 else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1554 getIndexBase () != map.getIndexBase ()) {
1555 // If the global min or max global index doesn't match, or if
1556 // the index base doesn't match, then the Maps aren't the same.
1557 return false;
1558 }
1559 else if (isDistributed () != map.isDistributed ()) {
1560 // One Map is distributed and the other is not, which means that
1561 // the Maps aren't the same.
1562 return false;
1563 }
1564 else if (isContiguous () && isUniform () &&
1565 map.isContiguous () && map.isUniform ()) {
1566 // Contiguous uniform Maps with the same number of processes in
1567 // their communicators, with the same global numbers of indices,
1568 // and with matching index bases and ranges, must be the same.
1569 return true;
1570 }
1571
1572 // The two communicators must have the same number of processes,
1573 // with process ranks occurring in the same order. This uses
1574 // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1575 // "Operations that access communicators are local and their
1576 // execution does not require interprocess communication."
1577 // However, just to be sure, I'll put this call after the above
1578 // tests that don't communicate.
1579 if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1580 return false;
1581 }
1582
1583 // If we get this far, we need to check local properties and then
1584 // communicate local sameness across all processes.
1585 const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1586
1587 // Return true if and only if all processes report local sameness.
1588 int isSame_gbl = 0;
1589 reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1590 return isSame_gbl == 1;
1591 }
1592
1593 namespace { // (anonymous)
1594 template <class LO, class GO, class DT>
1595 class FillLgMap {
1596 public:
1597 FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1598 const GO startGid) :
1599 lgMap_ (lgMap), startGid_ (startGid)
1600 {
1601 Kokkos::RangePolicy<LO, typename DT::execution_space>
1602 range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1603 Kokkos::parallel_for (range, *this);
1604 }
1605
1606 KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1607 lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1608 }
1609
1610 private:
1611 const Kokkos::View<GO*, DT> lgMap_;
1612 const GO startGid_;
1613 };
1614
1615 } // namespace (anonymous)
1616
1617
1618 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1619 typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1621 {
1622 using std::endl;
1623 using LO = local_ordinal_type;
1624 using GO = global_ordinal_type;
1625 using const_lg_view_type = decltype(lgMap_);
1626 using lg_view_type = typename const_lg_view_type::non_const_type;
1627 const bool debug = Details::Behavior::debug("Map");
1628 const bool verbose = Details::Behavior::verbose("Map");
1629
1630 std::unique_ptr<std::string> prefix;
1631 if (verbose) {
1632 prefix = Details::createPrefix(
1633 comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1634 std::ostringstream os;
1635 os << *prefix << "Start" << endl;
1636 std::cerr << os.str();
1637 }
1638
1639 // If the local-to-global mapping doesn't exist yet, and if we
1640 // have local entries, then create and fill the local-to-global
1641 // mapping.
1642 const bool needToCreateLocalToGlobalMapping =
1643 lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1644
1645 if (needToCreateLocalToGlobalMapping) {
1646 if (verbose) {
1647 std::ostringstream os;
1648 os << *prefix << "Need to create lgMap" << endl;
1649 std::cerr << os.str();
1650 }
1651 if (debug) {
1652 // The local-to-global mapping should have been set up already
1653 // for a noncontiguous map.
1654 TEUCHOS_TEST_FOR_EXCEPTION
1655 (! isContiguous(), std::logic_error,
1656 "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1657 "mapping (lgMap_) should have been set up already for a "
1658 "noncontiguous Map. Please report this bug to the Tpetra "
1659 "developers.");
1660 }
1661 const LO numElts = static_cast<LO> (getLocalNumElements ());
1662
1663 using Kokkos::view_alloc;
1664 using Kokkos::WithoutInitializing;
1665 lg_view_type lgMap ("lgMap", numElts);
1666 if (verbose) {
1667 std::ostringstream os;
1668 os << *prefix << "Fill lgMap" << endl;
1669 std::cerr << os.str();
1670 }
1671 FillLgMap<LO, GO, device_type> fillIt (lgMap, minMyGID_);
1672
1673 if (verbose) {
1674 std::ostringstream os;
1675 os << *prefix << "Copy lgMap to lgMapHost" << endl;
1676 std::cerr << os.str();
1677 }
1678
1679 auto lgMapHost = Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1680 // DEEP_COPY REVIEW - DEVICE-TO-HOST
1681 auto exec_instance = execution_space();
1682 Kokkos::deep_copy (exec_instance, lgMapHost, lgMap);
1683
1684 // There's a non-trivial chance we'll grab this on the host,
1685 // so let's make sure the copy finishes
1686 exec_instance.fence();
1687
1688 // "Commit" the local-to-global lookup table we filled in above.
1689 lgMap_ = lgMap;
1690 lgMapHost_ = lgMapHost;
1691 }
1692 else {
1693 lazyPushToHost();
1694 }
1695
1696 if (verbose) {
1697 std::ostringstream os;
1698 os << *prefix << "Done" << endl;
1699 std::cerr << os.str();
1700 }
1701 return lgMapHost_;
1702 }
1703
1704
1705 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1706 typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_device_type
1708 {
1709 using std::endl;
1710 using LO = local_ordinal_type;
1711 using GO = global_ordinal_type;
1712 using const_lg_view_type = decltype(lgMap_);
1713 using lg_view_type = typename const_lg_view_type::non_const_type;
1714 const bool debug = Details::Behavior::debug("Map");
1715 const bool verbose = Details::Behavior::verbose("Map");
1716
1717 std::unique_ptr<std::string> prefix;
1718 if (verbose) {
1719 prefix = Details::createPrefix(
1720 comm_.getRawPtr(), "Map", "getMyGlobalIndicesDevice");
1721 std::ostringstream os;
1722 os << *prefix << "Start" << endl;
1723 std::cerr << os.str();
1724 }
1725
1726 // If the local-to-global mapping doesn't exist yet, and if we
1727 // have local entries, then create and fill the local-to-global
1728 // mapping.
1729 const bool needToCreateLocalToGlobalMapping =
1730 lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1731
1732 if (needToCreateLocalToGlobalMapping) {
1733 if (verbose) {
1734 std::ostringstream os;
1735 os << *prefix << "Need to create lgMap" << endl;
1736 std::cerr << os.str();
1737 }
1738 if (debug) {
1739 // The local-to-global mapping should have been set up already
1740 // for a noncontiguous map.
1741 TEUCHOS_TEST_FOR_EXCEPTION
1742 (! isContiguous(), std::logic_error,
1743 "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1744 "mapping (lgMap_) should have been set up already for a "
1745 "noncontiguous Map. Please report this bug to the Tpetra "
1746 "developers.");
1747 }
1748 const LO numElts = static_cast<LO> (getLocalNumElements ());
1749
1750 using Kokkos::view_alloc;
1751 using Kokkos::WithoutInitializing;
1752 lg_view_type lgMap ("lgMap", numElts);
1753 if (verbose) {
1754 std::ostringstream os;
1755 os << *prefix << "Fill lgMap" << endl;
1756 std::cerr << os.str();
1757 }
1758 FillLgMap<LO, GO, device_type> fillIt (lgMap, minMyGID_);
1759
1760 // "Commit" the local-to-global lookup table we filled in above.
1761 lgMap_ = lgMap;
1762 }
1763
1764 if (verbose) {
1765 std::ostringstream os;
1766 os << *prefix << "Done" << endl;
1767 std::cerr << os.str();
1768 }
1769 return lgMap_;
1770 }
1771
1772
1773
1774 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1775 Teuchos::ArrayView<const GlobalOrdinal>
1777 {
1778 using GO = global_ordinal_type;
1779
1780 // If the local-to-global mapping doesn't exist yet, and if we
1781 // have local entries, then create and fill the local-to-global
1782 // mapping.
1783 (void) this->getMyGlobalIndices ();
1784
1785 // This does NOT assume UVM; lgMapHost_ is a host pointer.
1786 lazyPushToHost();
1787 const GO* lgMapHostRawPtr = lgMapHost_.data ();
1788 // The third argument forces ArrayView not to try to track memory
1789 // in a debug build. We have to use it because the memory does
1790 // not belong to a Teuchos memory management class.
1791 return Teuchos::ArrayView<const GO>(
1792 lgMapHostRawPtr,
1793 lgMapHost_.extent (0),
1794 Teuchos::RCP_DISABLE_NODE_LOOKUP);
1795 }
1796
1797 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1799 return distributed_;
1800 }
1801
1802 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1804 using Teuchos::TypeNameTraits;
1805 std::ostringstream os;
1806
1807 os << "Tpetra::Map: {"
1808 << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1809 << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1810 << ", NodeType: " << TypeNameTraits<Node>::name ();
1811 if (this->getObjectLabel () != "") {
1812 os << ", Label: \"" << this->getObjectLabel () << "\"";
1813 }
1814 os << ", Global number of entries: " << getGlobalNumElements ()
1815 << ", Number of processes: " << getComm ()->getSize ()
1816 << ", Uniform: " << (isUniform () ? "true" : "false")
1817 << ", Contiguous: " << (isContiguous () ? "true" : "false")
1818 << ", Distributed: " << (isDistributed () ? "true" : "false")
1819 << "}";
1820 return os.str ();
1821 }
1822
1827 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1828 std::string
1830 localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1831 {
1832 using LO = local_ordinal_type;
1833 using std::endl;
1834
1835 // This preserves current behavior of Map.
1836 if (vl < Teuchos::VERB_HIGH) {
1837 return std::string ();
1838 }
1839 auto outStringP = Teuchos::rcp (new std::ostringstream ());
1840 Teuchos::RCP<Teuchos::FancyOStream> outp =
1841 Teuchos::getFancyOStream (outStringP);
1842 Teuchos::FancyOStream& out = *outp;
1843
1844 auto comm = this->getComm ();
1845 const int myRank = comm->getRank ();
1846 const int numProcs = comm->getSize ();
1847 out << "Process " << myRank << " of " << numProcs << ":" << endl;
1848 Teuchos::OSTab tab1 (out);
1849
1850 const LO numEnt = static_cast<LO> (this->getLocalNumElements ());
1851 out << "My number of entries: " << numEnt << endl
1852 << "My minimum global index: " << this->getMinGlobalIndex () << endl
1853 << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1854
1855 if (vl == Teuchos::VERB_EXTREME) {
1856 out << "My global indices: [";
1857 const LO minLclInd = this->getMinLocalIndex ();
1858 for (LO k = 0; k < numEnt; ++k) {
1859 out << minLclInd + this->getGlobalElement (k);
1860 if (k + 1 < numEnt) {
1861 out << ", ";
1862 }
1863 }
1864 out << "]" << endl;
1865 }
1866
1867 out.flush (); // make sure the ostringstream got everything
1868 return outStringP->str ();
1869 }
1870
1871 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1872 void
1874 describe (Teuchos::FancyOStream &out,
1875 const Teuchos::EVerbosityLevel verbLevel) const
1876 {
1877 using Teuchos::TypeNameTraits;
1878 using Teuchos::VERB_DEFAULT;
1879 using Teuchos::VERB_NONE;
1880 using Teuchos::VERB_LOW;
1881 using Teuchos::VERB_HIGH;
1882 using std::endl;
1883 using LO = local_ordinal_type;
1884 using GO = global_ordinal_type;
1885 const Teuchos::EVerbosityLevel vl =
1886 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1887
1888 if (vl == VERB_NONE) {
1889 return; // don't print anything
1890 }
1891 // If this Map's Comm is null, then the Map does not participate
1892 // in collective operations with the other processes. In that
1893 // case, it is not even legal to call this method. The reasonable
1894 // thing to do in that case is nothing.
1895 auto comm = this->getComm ();
1896 if (comm.is_null ()) {
1897 return;
1898 }
1899 const int myRank = comm->getRank ();
1900 const int numProcs = comm->getSize ();
1901
1902 // Only Process 0 should touch the output stream, but this method
1903 // in general may need to do communication. Thus, we may need to
1904 // preserve the current tab level across multiple "if (myRank ==
1905 // 0) { ... }" inner scopes. This is why we sometimes create
1906 // OSTab instances by pointer, instead of by value. We only need
1907 // to create them by pointer if the tab level must persist through
1908 // multiple inner scopes.
1909 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1910
1911 if (myRank == 0) {
1912 // At every verbosity level but VERB_NONE, Process 0 prints.
1913 // By convention, describe() always begins with a tab before
1914 // printing.
1915 tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1916 out << "\"Tpetra::Map\":" << endl;
1917 tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1918 {
1919 out << "Template parameters:" << endl;
1920 Teuchos::OSTab tab2 (out);
1921 out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1922 << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1923 << "Node: " << TypeNameTraits<Node>::name () << endl;
1924 }
1925 const std::string label = this->getObjectLabel ();
1926 if (label != "") {
1927 out << "Label: \"" << label << "\"" << endl;
1928 }
1929 out << "Global number of entries: " << getGlobalNumElements () << endl
1930 << "Minimum global index: " << getMinAllGlobalIndex () << endl
1931 << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1932 << "Index base: " << getIndexBase () << endl
1933 << "Number of processes: " << numProcs << endl
1934 << "Uniform: " << (isUniform () ? "true" : "false") << endl
1935 << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1936 << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1937 }
1938
1939 // This is collective over the Map's communicator.
1940 if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1941 const std::string lclStr = this->localDescribeToString (vl);
1942 Tpetra::Details::gathervPrint (out, lclStr, *comm);
1943 }
1944 }
1945
1946 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1947 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1949 replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1950 {
1951 using Teuchos::RCP;
1952 using Teuchos::rcp;
1953 using GST = global_size_t;
1954 using LO = local_ordinal_type;
1955 using GO = global_ordinal_type;
1957
1958 // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1959 // the Map by calling its ordinary public constructor, using the
1960 // original Map's data. This only involves O(1) all-reduces over
1961 // the new communicator, which in the common case only includes a
1962 // small number of processes.
1963
1964 // Create the Map to return.
1965 if (newComm.is_null () || newComm->getSize () < 1) {
1966 return Teuchos::null; // my process does not participate in the new Map
1967 }
1968 else if (newComm->getSize () == 1) {
1969 lazyPushToHost();
1970
1971 // The case where the new communicator has only one process is
1972 // easy. We don't have to communicate to get all the
1973 // information we need. Use the default comm to create the new
1974 // Map, then fill in all the fields directly.
1975 RCP<map_type> newMap (new map_type ());
1976
1977 newMap->comm_ = newComm;
1978 // mfh 07 Oct 2016: Preserve original behavior, even though the
1979 // original index base may no longer be the globally min global
1980 // index. See #616 for why this doesn't matter so much anymore.
1981 newMap->indexBase_ = this->indexBase_;
1982 newMap->numGlobalElements_ = this->numLocalElements_;
1983 newMap->numLocalElements_ = this->numLocalElements_;
1984 newMap->minMyGID_ = this->minMyGID_;
1985 newMap->maxMyGID_ = this->maxMyGID_;
1986 newMap->minAllGID_ = this->minMyGID_;
1987 newMap->maxAllGID_ = this->maxMyGID_;
1988 newMap->firstContiguousGID_ = this->firstContiguousGID_;
1989 newMap->lastContiguousGID_ = this->lastContiguousGID_;
1990 // Since the new communicator has only one process, neither
1991 // uniformity nor contiguity have changed.
1992 newMap->uniform_ = this->uniform_;
1993 newMap->contiguous_ = this->contiguous_;
1994 // The new communicator only has one process, so the new Map is
1995 // not distributed.
1996 newMap->distributed_ = false;
1997 newMap->lgMap_ = this->lgMap_;
1998 newMap->lgMapHost_ = this->lgMapHost_;
1999 newMap->glMap_ = this->glMap_;
2000 newMap->glMapHost_ = this->glMapHost_;
2001 // It's OK not to initialize the new Map's Directory.
2002 // This is initialized lazily, on first call to getRemoteIndexList.
2003
2004 return newMap;
2005 }
2006 else { // newComm->getSize() != 1
2007 // Even if the original Map is contiguous, the new Map might not
2008 // be, especially if the excluded processes have ranks != 0 or
2009 // newComm->getSize()-1. The common case for this method is to
2010 // exclude many (possibly even all but one) processes, so it
2011 // likely doesn't pay to do the global communication (over the
2012 // original communicator) to figure out whether we can optimize
2013 // the result Map. Thus, we just set up the result Map as
2014 // noncontiguous.
2015 //
2016 // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
2017 // the global-to-local table, etc. Optimize this code path to
2018 // avoid unnecessary local work.
2019
2020 // Make Map (re)compute the global number of elements.
2021 const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2022 // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
2023 // to use the noncontiguous Map constructor, since the new Map
2024 // might not be contiguous. Even if the old Map was contiguous,
2025 // some process in the "middle" might have been excluded. If we
2026 // want to avoid local work, we either have to do the setup by
2027 // hand, or write a new Map constructor.
2028#if 1
2029 // The disabled code here throws the following exception in
2030 // Map's replaceCommWithSubset test:
2031 //
2032 // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ())
2033 // 10:
2034 // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
2035 // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
2036
2037 auto lgMap = this->getMyGlobalIndices ();
2038 using size_type =
2039 typename std::decay<decltype (lgMap.extent (0)) >::type;
2040 const size_type lclNumInds =
2041 static_cast<size_type> (this->getLocalNumElements ());
2042 using Teuchos::TypeNameTraits;
2043 TEUCHOS_TEST_FOR_EXCEPTION
2044 (lgMap.extent (0) != lclNumInds, std::logic_error,
2045 "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
2046 "has length " << lgMap.extent (0) << " (of type " <<
2047 TypeNameTraits<size_type>::name () << ") != this->getLocalNumElements()"
2048 " = " << this->getLocalNumElements () << ". The latter, upon being "
2049 "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
2050 "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
2051 "developers.");
2052#else
2053 Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList ();
2054#endif // 1
2055
2056 const GO indexBase = this->getIndexBase ();
2057 // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
2058 auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
2059 return rcp (new map_type (RECOMPUTE, lgMap_device, indexBase, newComm));
2060 }
2061 }
2062
2063 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2064 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
2066 removeEmptyProcesses () const
2067 {
2068 using Teuchos::Comm;
2069 using Teuchos::null;
2070 using Teuchos::outArg;
2071 using Teuchos::RCP;
2072 using Teuchos::rcp;
2073 using Teuchos::REDUCE_MIN;
2074 using Teuchos::reduceAll;
2075
2076 // Create the new communicator. split() returns a valid
2077 // communicator on all processes. On processes where color == 0,
2078 // ignore the result. Passing key == 0 tells MPI to order the
2079 // processes in the new communicator by their rank in the old
2080 // communicator.
2081 const int color = (numLocalElements_ == 0) ? 0 : 1;
2082 // MPI_Comm_split must be called collectively over the original
2083 // communicator. We can't just call it on processes with color
2084 // one, even though we will ignore its result on processes with
2085 // color zero.
2086 RCP<const Comm<int> > newComm = comm_->split (color, 0);
2087 if (color == 0) {
2088 newComm = null;
2089 }
2090
2091 // Create the Map to return.
2092 if (newComm.is_null ()) {
2093 return null; // my process does not participate in the new Map
2094 } else {
2095 RCP<Map> map = rcp (new Map ());
2096
2097 map->comm_ = newComm;
2098 map->indexBase_ = indexBase_;
2099 map->numGlobalElements_ = numGlobalElements_;
2100 map->numLocalElements_ = numLocalElements_;
2101 map->minMyGID_ = minMyGID_;
2102 map->maxMyGID_ = maxMyGID_;
2103 map->minAllGID_ = minAllGID_;
2104 map->maxAllGID_ = maxAllGID_;
2105 map->firstContiguousGID_= firstContiguousGID_;
2106 map->lastContiguousGID_ = lastContiguousGID_;
2107
2108 // Uniformity and contiguity have not changed. The directory
2109 // has changed, but we've taken care of that above.
2110 map->uniform_ = uniform_;
2111 map->contiguous_ = contiguous_;
2112
2113 // If the original Map was NOT distributed, then the new Map
2114 // cannot be distributed.
2115 //
2116 // If the number of processes in the new communicator is 1, then
2117 // the new Map is not distributed.
2118 //
2119 // Otherwise, we have to check the new Map using an all-reduce
2120 // (over the new communicator). For example, the original Map
2121 // may have had some processes with zero elements, and all other
2122 // processes with the same number of elements as in the whole
2123 // Map. That Map is technically distributed, because of the
2124 // processes with zero elements. Removing those processes would
2125 // make the new Map locally replicated.
2126 if (! distributed_ || newComm->getSize () == 1) {
2127 map->distributed_ = false;
2128 } else {
2129 const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2130 int allProcsOwnAllGids = 0;
2131 reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
2132 map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2133 }
2134
2135 map->lgMap_ = lgMap_;
2136 map->lgMapHost_ = lgMapHost_;
2137 map->glMap_ = glMap_;
2138 map->glMapHost_ = glMapHost_;
2139
2140 // Map's default constructor creates an uninitialized Directory.
2141 // The Directory will be initialized on demand in
2142 // getRemoteIndexList().
2143 //
2144 // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2145 // directory more efficiently than just recreating it. If
2146 // directory recreation proves a bottleneck, we can always
2147 // revisit this. On the other hand, Directory creation is only
2148 // collective over the new, presumably much smaller
2149 // communicator, so it may not be worth the effort to optimize.
2150
2151 return map;
2152 }
2153 }
2154
2155 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2156 void
2157 Map<LocalOrdinal,GlobalOrdinal,Node>::setupDirectory () const
2158 {
2159 TEUCHOS_TEST_FOR_EXCEPTION(
2160 directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
2161 "The Directory is null. "
2162 "Please report this bug to the Tpetra developers.");
2163
2164 // Only create the Directory if it hasn't been created yet.
2165 // This is a collective operation.
2166 if (! directory_->initialized ()) {
2167 directory_->initialize (*this);
2168 }
2169 }
2170
2171 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2174 getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2175 const Teuchos::ArrayView<int>& PIDs,
2176 const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
2177 {
2178 using Tpetra::Details::OrdinalTraits;
2180 using std::endl;
2181 using size_type = Teuchos::ArrayView<int>::size_type;
2182
2183 const bool verbose = Details::Behavior::verbose("Map");
2184 const size_t maxNumToPrint = verbose ?
2186 std::unique_ptr<std::string> prefix;
2187 if (verbose) {
2188 prefix = Details::createPrefix(comm_.getRawPtr(),
2189 "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2190 std::ostringstream os;
2191 os << *prefix << "Start: ";
2192 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2193 os << endl;
2194 std::cerr << os.str();
2195 }
2196
2197 // Empty Maps (i.e., containing no indices on any processes in the
2198 // Map's communicator) are perfectly valid. In that case, if the
2199 // input GID list is nonempty, we fill the output arrays with
2200 // invalid values, and return IDNotPresent to notify the caller.
2201 // It's perfectly valid to give getRemoteIndexList GIDs that the
2202 // Map doesn't own. SubmapImport test 2 needs this functionality.
2203 if (getGlobalNumElements () == 0) {
2204 if (GIDs.size () == 0) {
2205 if (verbose) {
2206 std::ostringstream os;
2207 os << *prefix << "Done; both Map & input are empty" << endl;
2208 std::cerr << os.str();
2209 }
2210 return AllIDsPresent; // trivially
2211 }
2212 else {
2213 if (verbose) {
2214 std::ostringstream os;
2215 os << *prefix << "Done: Map is empty on all processes, "
2216 "so all output PIDs & LIDs are invalid (-1)." << endl;
2217 std::cerr << os.str();
2218 }
2219 for (size_type k = 0; k < PIDs.size (); ++k) {
2220 PIDs[k] = OrdinalTraits<int>::invalid ();
2221 }
2222 for (size_type k = 0; k < LIDs.size (); ++k) {
2223 LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
2224 }
2225 return IDNotPresent;
2226 }
2227 }
2228
2229 // getRemoteIndexList must be called collectively, and Directory
2230 // initialization is collective too, so it's OK to initialize the
2231 // Directory on demand.
2232
2233 if (verbose) {
2234 std::ostringstream os;
2235 os << *prefix << "Call setupDirectory" << endl;
2236 std::cerr << os.str();
2237 }
2238 setupDirectory();
2239 if (verbose) {
2240 std::ostringstream os;
2241 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2242 std::cerr << os.str();
2243 }
2244 const Tpetra::LookupStatus retVal =
2245 directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
2246 if (verbose) {
2247 std::ostringstream os;
2248 os << *prefix << "Done; getDirectoryEntries returned "
2249 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2250 << "; ";
2251 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2252 os << ", ";
2253 verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2254 os << endl;
2255 std::cerr << os.str();
2256 }
2257 return retVal;
2258 }
2259
2260 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2263 getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
2264 const Teuchos::ArrayView<int> & PIDs) const
2265 {
2267 using std::endl;
2268
2269 const bool verbose = Details::Behavior::verbose("Map");
2270 const size_t maxNumToPrint = verbose ?
2272 std::unique_ptr<std::string> prefix;
2273 if (verbose) {
2274 prefix = Details::createPrefix(comm_.getRawPtr(),
2275 "Map", "getRemoteIndexList(GIDs,PIDs)");
2276 std::ostringstream os;
2277 os << *prefix << "Start: ";
2278 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2279 os << endl;
2280 std::cerr << os.str();
2281 }
2282
2283 if (getGlobalNumElements () == 0) {
2284 if (GIDs.size () == 0) {
2285 if (verbose) {
2286 std::ostringstream os;
2287 os << *prefix << "Done; both Map & input are empty" << endl;
2288 std::cerr << os.str();
2289 }
2290 return AllIDsPresent; // trivially
2291 }
2292 else {
2293 if (verbose) {
2294 std::ostringstream os;
2295 os << *prefix << "Done: Map is empty on all processes, "
2296 "so all output PIDs are invalid (-1)." << endl;
2297 std::cerr << os.str();
2298 }
2299 for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
2300 PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
2301 }
2302 return IDNotPresent;
2303 }
2304 }
2305
2306 // getRemoteIndexList must be called collectively, and Directory
2307 // initialization is collective too, so it's OK to initialize the
2308 // Directory on demand.
2309
2310 if (verbose) {
2311 std::ostringstream os;
2312 os << *prefix << "Call setupDirectory" << endl;
2313 std::cerr << os.str();
2314 }
2315 setupDirectory();
2316 if (verbose) {
2317 std::ostringstream os;
2318 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2319 std::cerr << os.str();
2320 }
2321 const Tpetra::LookupStatus retVal =
2322 directory_->getDirectoryEntries(*this, GIDs, PIDs);
2323 if (verbose) {
2324 std::ostringstream os;
2325 os << *prefix << "Done; getDirectoryEntries returned "
2326 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2327 << "; ";
2328 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2329 os << endl;
2330 std::cerr << os.str();
2331 }
2332 return retVal;
2333 }
2334
2335 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2336 void
2337 Map<LocalOrdinal,GlobalOrdinal,Node>::lazyPushToHost() const{
2338 using exec_space = typename Node::device_type::execution_space;
2339 if(lgMap_.extent(0) != lgMapHost_.extent(0)) {
2340 Tpetra::Details::ProfilingRegion pr("Map::lazyPushToHost() - pushing data");
2341 // NOTE: We check lgMap_ and not glMap_, since the latter can
2342 // be somewhat error prone for contiguous maps
2343
2344 // create_mirror_view preserves const-ness. create_mirror does not
2345 auto lgMap_host = Kokkos::create_mirror(Kokkos::HostSpace (), lgMap_);
2346
2347 // Since this was computed on the default stream, we can copy on the stream and then fence
2348 // the stream
2349 Kokkos::deep_copy(exec_space(),lgMap_host,lgMap_);
2350 exec_space().fence();
2351 lgMapHost_ = lgMap_host;
2352
2353 // Make host version - when memory spaces match these just do trivial assignment
2354 glMapHost_ = global_to_local_table_host_type(glMap_);
2355 }
2356 }
2357
2358
2359 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2360 Teuchos::RCP<const Teuchos::Comm<int> >
2362 return comm_;
2363 }
2364
2365 template <class LocalOrdinal,class GlobalOrdinal, class Node>
2366 bool
2368 checkIsDist() const
2369 {
2370 using Teuchos::as;
2371 using Teuchos::outArg;
2372 using Teuchos::REDUCE_MIN;
2373 using Teuchos::reduceAll;
2374 using std::endl;
2375
2376 const bool verbose = Details::Behavior::verbose("Map");
2377 std::unique_ptr<std::string> prefix;
2378 if (verbose) {
2379 prefix = Details::createPrefix(
2380 comm_.getRawPtr(), "Map", "checkIsDist");
2381 std::ostringstream os;
2382 os << *prefix << "Start" << endl;
2383 std::cerr << os.str();
2384 }
2385
2386 bool global = false;
2387 if (comm_->getSize () > 1) {
2388 // The communicator has more than one process, but that doesn't
2389 // necessarily mean the Map is distributed.
2390 int localRep = 0;
2391 if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
2392 // The number of local elements on this process equals the
2393 // number of global elements.
2394 //
2395 // NOTE (mfh 22 Nov 2011) Does this still work if there were
2396 // duplicates in the global ID list on input (the third Map
2397 // constructor), so that the number of local elements (which
2398 // are not duplicated) on this process could be less than the
2399 // number of global elements, even if this process owns all
2400 // the elements?
2401 localRep = 1;
2402 }
2403 int allLocalRep;
2404 reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2405 if (allLocalRep != 1) {
2406 // At least one process does not own all the elements.
2407 // This makes the Map a distributed Map.
2408 global = true;
2409 }
2410 }
2411 // If the communicator has only one process, then the Map is not
2412 // distributed.
2413
2414 if (verbose) {
2415 std::ostringstream os;
2416 os << *prefix << "Done; global=" << (global ? "true" : "false")
2417 << endl;
2418 std::cerr << os.str();
2419 }
2420 return global;
2421 }
2422
2423} // namespace Tpetra
2424
2425template <class LocalOrdinal, class GlobalOrdinal>
2426Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2427Tpetra::createLocalMap (const size_t numElements,
2428 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2429{
2430 typedef LocalOrdinal LO;
2431 typedef GlobalOrdinal GO;
2432 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2433 return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2434}
2435
2436template <class LocalOrdinal, class GlobalOrdinal>
2437Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2438Tpetra::createUniformContigMap (const global_size_t numElements,
2439 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2440{
2441 typedef LocalOrdinal LO;
2442 typedef GlobalOrdinal GO;
2443 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2444 return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2445}
2446
2447template <class LocalOrdinal, class GlobalOrdinal, class Node>
2448Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2449Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2450 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2451)
2452{
2453 using Teuchos::rcp;
2455 const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2456
2457 return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2458}
2459
2460template <class LocalOrdinal, class GlobalOrdinal, class Node>
2461Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2462Tpetra::createLocalMapWithNode (const size_t numElements,
2463 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2464)
2465{
2467 using Teuchos::rcp;
2469 const GlobalOrdinal indexBase = 0;
2470 const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2471
2472 return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2473}
2474
2475template <class LocalOrdinal, class GlobalOrdinal, class Node>
2476Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2478 const size_t localNumElements,
2479 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2480)
2481{
2482 using Teuchos::rcp;
2484 const GlobalOrdinal indexBase = 0;
2485
2486 return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2487}
2488
2489template <class LocalOrdinal, class GlobalOrdinal>
2490Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2492 const size_t localNumElements,
2493 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2494{
2495 typedef LocalOrdinal LO;
2496 typedef GlobalOrdinal GO;
2497 using NT = typename Tpetra::Map<LO, GO>::node_type;
2498
2499 return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2500}
2501
2502template <class LocalOrdinal, class GlobalOrdinal>
2503Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2504Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2505 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2506{
2507 typedef LocalOrdinal LO;
2508 typedef GlobalOrdinal GO;
2509 using NT = typename Tpetra::Map<LO, GO>::node_type;
2510
2511 return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2512}
2513
2514template <class LocalOrdinal, class GlobalOrdinal, class Node>
2515Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2516Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2517 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2518)
2519{
2520 using Teuchos::rcp;
2522 using GST = Tpetra::global_size_t;
2523 const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2524 // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2525 // shouldn't be zero, given that the index base is supposed to equal
2526 // the globally min global index?
2527 const GlobalOrdinal indexBase = 0;
2528
2529 return rcp (new map_type (INV, elementList, indexBase, comm));
2530}
2531
2532template<class LO, class GO, class NT>
2533Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2534Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2535{
2536 using Details::verbosePrintArray;
2537 using Teuchos::Array;
2538 using Teuchos::ArrayView;
2539 using Teuchos::as;
2540 using Teuchos::rcp;
2541 using std::cerr;
2542 using std::endl;
2543 using map_type = Tpetra::Map<LO, GO, NT>;
2544 using GST = global_size_t;
2545
2546 const bool verbose = Details::Behavior::verbose("Map");
2547 std::unique_ptr<std::string> prefix;
2548 if (verbose) {
2549 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2550 prefix = Details::createPrefix(
2551 comm.getRawPtr(), "createOneToOne(Map)");
2552 std::ostringstream os;
2553 os << *prefix << "Start" << endl;
2554 cerr << os.str();
2555 }
2556 const size_t maxNumToPrint = verbose ?
2557 Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2558 const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2559 const int myRank = M->getComm ()->getRank ();
2560
2561 // Bypasses for special cases where either M is known to be
2562 // one-to-one, or the one-to-one version of M is easy to compute.
2563 // This is why we take M as an RCP, not as a const reference -- so
2564 // that we can return M itself if it is 1-to-1.
2565 if (! M->isDistributed ()) {
2566 // For a locally replicated Map, we assume that users want to push
2567 // all the GIDs to Process 0.
2568
2569 // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2570 // you think it should return, in this special case of a locally
2571 // replicated contiguous Map.
2572 const GST numGlobalEntries = M->getGlobalNumElements ();
2573 if (M->isContiguous()) {
2574 const size_t numLocalEntries =
2575 (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2576 if (verbose) {
2577 std::ostringstream os;
2578 os << *prefix << "Input is locally replicated & contiguous; "
2579 "numLocalEntries=" << numLocalEntries << endl;
2580 cerr << os.str ();
2581 }
2582 auto retMap =
2583 rcp(new map_type(numGlobalEntries, numLocalEntries,
2584 M->getIndexBase(), M->getComm()));
2585 if (verbose) {
2586 std::ostringstream os;
2587 os << *prefix << "Done" << endl;
2588 cerr << os.str ();
2589 }
2590 return retMap;
2591 }
2592 else {
2593 if (verbose) {
2594 std::ostringstream os;
2595 os << *prefix << "Input is locally replicated & noncontiguous"
2596 << endl;
2597 cerr << os.str ();
2598 }
2599 ArrayView<const GO> myGids =
2600 (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2601 auto retMap =
2602 rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2603 M->getComm()));
2604 if (verbose) {
2605 std::ostringstream os;
2606 os << *prefix << "Done" << endl;
2607 cerr << os.str ();
2608 }
2609 return retMap;
2610 }
2611 }
2612 else if (M->isContiguous ()) {
2613 if (verbose) {
2614 std::ostringstream os;
2615 os << *prefix << "Input is distributed & contiguous" << endl;
2616 cerr << os.str ();
2617 }
2618 // Contiguous, distributed Maps are one-to-one by construction.
2619 // (Locally replicated Maps can be contiguous.)
2620 return M;
2621 }
2622 else {
2623 if (verbose) {
2624 std::ostringstream os;
2625 os << *prefix << "Input is distributed & noncontiguous" << endl;
2626 cerr << os.str ();
2627 }
2629 const size_t numMyElems = M->getLocalNumElements ();
2630 ArrayView<const GO> myElems = M->getLocalElementList ();
2631 Array<int> owner_procs_vec (numMyElems);
2632
2633 if (verbose) {
2634 std::ostringstream os;
2635 os << *prefix << "Call Directory::getDirectoryEntries: ";
2636 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2637 os << endl;
2638 cerr << os.str();
2639 }
2640 directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2641 if (verbose) {
2642 std::ostringstream os;
2643 os << *prefix << "getDirectoryEntries result: ";
2644 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2645 os << endl;
2646 cerr << os.str();
2647 }
2648
2649 Array<GO> myOwned_vec (numMyElems);
2650 size_t numMyOwnedElems = 0;
2651 for (size_t i = 0; i < numMyElems; ++i) {
2652 const GO GID = myElems[i];
2653 const int owner = owner_procs_vec[i];
2654
2655 if (myRank == owner) {
2656 myOwned_vec[numMyOwnedElems++] = GID;
2657 }
2658 }
2659 myOwned_vec.resize (numMyOwnedElems);
2660
2661 if (verbose) {
2662 std::ostringstream os;
2663 os << *prefix << "Create Map: ";
2664 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2665 os << endl;
2666 cerr << os.str();
2667 }
2668 auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2669 M->getIndexBase(), M->getComm()));
2670 if (verbose) {
2671 std::ostringstream os;
2672 os << *prefix << "Done" << endl;
2673 cerr << os.str();
2674 }
2675 return retMap;
2676 }
2677}
2678
2679template<class LocalOrdinal, class GlobalOrdinal, class Node>
2680Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2683{
2684 using Details::Behavior;
2685 using Details::verbosePrintArray;
2686 using Teuchos::Array;
2687 using Teuchos::ArrayView;
2688 using Teuchos::RCP;
2689 using Teuchos::rcp;
2690 using Teuchos::toString;
2691 using std::cerr;
2692 using std::endl;
2693 using LO = LocalOrdinal;
2694 using GO = GlobalOrdinal;
2695 using map_type = Tpetra::Map<LO, GO, Node>;
2696
2697 const bool verbose = Behavior::verbose("Map");
2698 std::unique_ptr<std::string> prefix;
2699 if (verbose) {
2700 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2701 prefix = Details::createPrefix(
2702 comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2703 std::ostringstream os;
2704 os << *prefix << "Start" << endl;
2705 cerr << os.str();
2706 }
2707 const size_t maxNumToPrint = verbose ?
2708 Behavior::verbosePrintCountThreshold() : size_t(0);
2709
2710 // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2711 // Maps (which are 1-to-1 by construction).
2712
2714 if (verbose) {
2715 std::ostringstream os;
2716 os << *prefix << "Initialize Directory" << endl;
2717 cerr << os.str ();
2718 }
2719 directory.initialize (*M, tie_break);
2720 if (verbose) {
2721 std::ostringstream os;
2722 os << *prefix << "Done initializing Directory" << endl;
2723 cerr << os.str ();
2724 }
2725 size_t numMyElems = M->getLocalNumElements ();
2726 ArrayView<const GO> myElems = M->getLocalElementList ();
2727 Array<int> owner_procs_vec (numMyElems);
2728 if (verbose) {
2729 std::ostringstream os;
2730 os << *prefix << "Call Directory::getDirectoryEntries: ";
2731 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2732 os << endl;
2733 cerr << os.str();
2734 }
2735 directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2736 if (verbose) {
2737 std::ostringstream os;
2738 os << *prefix << "getDirectoryEntries result: ";
2739 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2740 os << endl;
2741 cerr << os.str();
2742 }
2743
2744 const int myRank = M->getComm()->getRank();
2745 Array<GO> myOwned_vec (numMyElems);
2746 size_t numMyOwnedElems = 0;
2747 for (size_t i = 0; i < numMyElems; ++i) {
2748 const GO GID = myElems[i];
2749 const int owner = owner_procs_vec[i];
2750 if (myRank == owner) {
2751 myOwned_vec[numMyOwnedElems++] = GID;
2752 }
2753 }
2754 myOwned_vec.resize (numMyOwnedElems);
2755
2756 // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2757 // valid for the new Map. Why can't we reuse it?
2758 const global_size_t GINV =
2759 Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2760 if (verbose) {
2761 std::ostringstream os;
2762 os << *prefix << "Create Map: ";
2763 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2764 os << endl;
2765 cerr << os.str();
2766 }
2767 RCP<const map_type> retMap
2768 (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2769 M->getComm ()));
2770 if (verbose) {
2771 std::ostringstream os;
2772 os << *prefix << "Done" << endl;
2773 cerr << os.str();
2774 }
2775 return retMap;
2776}
2777
2778//
2779// Explicit instantiation macro
2780//
2781// Must be expanded from within the Tpetra namespace!
2782//
2783
2785
2786#define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2787 \
2788 template class Map< LO , GO , NODE >; \
2789 \
2790 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2791 createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2792 const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2793 \
2794 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2795 createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2796 const size_t localNumElements, \
2797 const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2798 \
2799 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2800 createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2801 const Teuchos::RCP<const Teuchos::Comm<int> > &comm); \
2802 \
2803 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2804 createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2805 const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2806 \
2807 template Teuchos::RCP<const Map<LO,GO,NODE> > \
2808 createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2809 \
2810 template Teuchos::RCP<const Map<LO,GO,NODE> > \
2811 createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2812 const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2813
2814
2816#define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2817 template Teuchos::RCP< const Map<LO,GO> > \
2818 createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2819 \
2820 template Teuchos::RCP< const Map<LO,GO> > \
2821 createContigMap<LO,GO>( global_size_t, size_t, \
2822 const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2823 \
2824 template Teuchos::RCP< const Map<LO,GO> > \
2825 createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2826 const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2827 \
2828 template Teuchos::RCP< const Map<LO,GO> > \
2829 createUniformContigMap<LO,GO>( const global_size_t, \
2830 const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2831
2832#endif // TPETRA_MAP_DEF_HPP
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::initializeKokkos.
Declaration of Tpetra::Details::printOnce.
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static void reject_unrecognized_env_vars()
Search the environment for TPETRA_ variables and reject unrecognized ones.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Interface for breaking ties in ownership.
Implement mapping from global ID to process ID and local ID.
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
void initialize(const map_type &map)
Initialize the Directory with its Map.
A parallel distribution of indices over processes.
::Tpetra::Details::LocalMap< local_ordinal_type, global_ordinal_type, device_type > local_map_type
Type of the "local" Map.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
bool isOneToOne() const
Whether the Map is one to one.
std::string description() const
Implementation of Teuchos::Describable.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Node node_type
Legacy typedef that will go away at some point.
Map()
Default constructor (that does nothing).
GlobalOrdinal global_ordinal_type
The type of global indices.
Map(const global_size_t numGlobalElements, const global_ordinal_type indexBase, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const LocalGlobal lg=GloballyDistributed)
Constructor with contiguous uniform distribution.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
typename device_type::execution_space execution_space
The Kokkos execution space.
LocalOrdinal local_ordinal_type
The type of local indices.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
global_ordinal_type getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
global_ordinal_type getIndexBase() const
The index base for this Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map's device.
global_ordinal_type getMinGlobalIndex() const
The minimum global index owned by the calling process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map's communicator with a subset communicator.
global_ordinal_type getMaxGlobalIndex() const
The maximum global index owned by the calling process.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
local_ordinal_type getMinLocalIndex() const
The minimum local index.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
local_ordinal_type getMaxLocalIndex() const
The maximum local index on the calling process.
typename Node::device_type device_type
This class' Kokkos::Device specialization.
Implementation details of Tpetra.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void printOnce(std::ostream &out, const std::string &s, const Teuchos::Comm< int > *comm)
Print on one process of the given communicator, or at least try to do so (if MPI is not initialized).
bool teuchosCommIsAnMpiComm(const Teuchos::Comm< int > &)
Is the given Comm a Teuchos::MpiComm<int> instance?
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
bool mpiIsInitialized()
Has MPI_Init been called (on this process)?
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
bool mpiIsFinalized()
Has MPI_Finalize been called (on this process)?
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
LocalGlobal
Enum for local versus global allocation of Map entries.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...