Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_DirectoryImpl_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DIRECTORYIMPL_DEF_HPP
11#define TPETRA_DIRECTORYIMPL_DEF_HPP
12
15
16#include "Tpetra_Distributor.hpp"
17#include "Tpetra_Map.hpp"
18#include "Tpetra_TieBreak.hpp"
19#include "Tpetra_Util.hpp"
20#include "Tpetra_Details_FixedHashTable.hpp"
21#include "Teuchos_Comm.hpp"
22#include <memory>
23#include <sstream>
24
25// FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
26#ifdef HAVE_TPETRACORE_MPI
27# include <mpi.h>
28#endif // HAVE_TPETRACORE_MPI
29// FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
30
31namespace Tpetra {
32 namespace Details {
33 template<class LO, class GO, class NT>
36 getEntries (const map_type& map,
37 const Teuchos::ArrayView<const GO> &globalIDs,
38 const Teuchos::ArrayView<int> &nodeIDs,
39 const Teuchos::ArrayView<LO> &localIDs,
40 const bool computeLIDs) const
41 {
42 // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
43 // all have the same size, before modifying any output arguments.
44 TEUCHOS_TEST_FOR_EXCEPTION(nodeIDs.size() != globalIDs.size(),
45 std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
46 "Output arrays do not have the right sizes. nodeIDs.size() = "
47 << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size()
48 << ".");
49 TEUCHOS_TEST_FOR_EXCEPTION(
50 computeLIDs && localIDs.size() != globalIDs.size(),
51 std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
52 "Output array do not have the right sizes. localIDs.size() = "
53 << localIDs.size() << " != globalIDs.size() = " << globalIDs.size()
54 << ".");
55
56 // Initially, fill nodeIDs and localIDs (if applicable) with
57 // invalid values. The "invalid" process ID is -1 (this means
58 // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
59 // "invalid" process ID); the invalid local ID comes from
60 // OrdinalTraits.
61 std::fill (nodeIDs.begin(), nodeIDs.end(), -1);
62 if (computeLIDs) {
63 std::fill (localIDs.begin(), localIDs.end(),
64 Teuchos::OrdinalTraits<LO>::invalid ());
65 }
66 // Actually do the work.
67 return this->getEntriesImpl (map, globalIDs, nodeIDs, localIDs, computeLIDs);
68 }
69
70
71 template<class LO, class GO, class NT>
73 ReplicatedDirectory (const map_type& map) :
74 numProcs_ (map.getComm ()->getSize ())
75 {}
76
77
78 template<class LO, class GO, class NT>
79 bool
81 isOneToOne (const Teuchos::Comm<int>& /* comm */) const
82 {
83 // A locally replicated Map is one-to-one only if there is no
84 // replication, that is, only if the Map's communicator only has
85 // one process.
86 return (numProcs_ == 1);
87 }
88
89
90 template<class LO, class GO, class NT>
91 std::string
93 {
94 std::ostringstream os;
95 os << "ReplicatedDirectory"
96 << "<" << Teuchos::TypeNameTraits<LO>::name ()
97 << ", " << Teuchos::TypeNameTraits<GO>::name ()
98 << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
99 return os.str ();
100 }
101
102
103 template<class LO, class GO, class NT>
106 {
107 TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
108 Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
109 TEUCHOS_TEST_FOR_EXCEPTION(! map.isUniform (), std::invalid_argument,
110 Teuchos::typeName (*this) << " constructor: Map is not uniform.");
111 }
112
113
114 template<class LO, class GO, class NT>
115 std::string
117 {
118 std::ostringstream os;
119 os << "ContiguousUniformDirectory"
120 << "<" << Teuchos::TypeNameTraits<LO>::name ()
121 << ", " << Teuchos::TypeNameTraits<GO>::name ()
122 << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
123 return os.str ();
124 }
125
126
127 template<class LO, class GO, class NT>
130 getEntriesImpl (const map_type& map,
131 const Teuchos::ArrayView<const GO> &globalIDs,
132 const Teuchos::ArrayView<int> &nodeIDs,
133 const Teuchos::ArrayView<LO> &localIDs,
134 const bool computeLIDs) const
135 {
136 using Teuchos::Comm;
137 using Teuchos::RCP;
138 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
139 const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid ();
141
142 RCP<const Comm<int> > comm = map.getComm ();
143 const GO g_min = map.getMinAllGlobalIndex ();
144
145 // Let N_G be the global number of elements in the Map,
146 // and P be the number of processes in its communicator.
147 // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
148 //
149 // The first R processes own N_L+1 elements.
150 // The remaining P-R processes own N_L elements.
151 //
152 // Let g be the current GID, g_min be the global minimum GID,
153 // and g_0 = g - g_min. If g is a valid GID in this Map, then
154 // g_0 is in [0, N_G - 1].
155 //
156 // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
157 // the rank of the process that owns g is floor(g_0 / (N_L +
158 // 1)), and its corresponding local index on that process is g_0
159 // mod (N_L + 1).
160 //
161 // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
162 // and g_0 >= R*(N_L + 1), then the rank of the process that
163 // owns g is then R + floor(g_R / N_L), and its corresponding
164 // local index on that process is g_R mod N_L.
165
166 const size_type N_G =
167 static_cast<size_type> (map.getGlobalNumElements ());
168 const size_type P = static_cast<size_type> (comm->getSize ());
169 const size_type N_L = N_G / P;
170 const size_type R = N_G - N_L * P; // N_G mod P
171 const size_type N_R = R * (N_L + static_cast<size_type> (1));
172
173#ifdef HAVE_TPETRA_DEBUG
174 TEUCHOS_TEST_FOR_EXCEPTION(
175 N_G != P*N_L + R, std::logic_error,
176 "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
177 "N_G = " << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
178 << " = " << P*N_L + R << ". "
179 "Please report this bug to the Tpetra developers.");
180#endif // HAVE_TPETRA_DEBUG
181
182 const size_type numGids = globalIDs.size (); // for const loop bound
183 // Avoid signed/unsigned comparisons below, in case GO is
184 // unsigned. (Integer literals are generally signed.)
185 const GO ONE = static_cast<GO> (1);
186
187 if (computeLIDs) {
188 for (size_type k = 0; k < numGids; ++k) {
189 const GO g_0 = globalIDs[k] - g_min;
190
191 // The first test is a little strange just in case GO is
192 // unsigned. Compilers raise a warning on tests like "x <
193 // 0" if x is unsigned, but don't usually raise a warning if
194 // the expression is a bit more complicated than that.
195 if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
196 nodeIDs[k] = -1;
197 localIDs[k] = invalidLid;
198 res = IDNotPresent;
199 }
200 else if (g_0 < static_cast<GO> (N_R)) {
201 // The GID comes from the initial sequence of R processes.
202 nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
203 localIDs[k] = static_cast<LO> (g_0 % static_cast<GO> (N_L + 1));
204 }
205 else if (g_0 >= static_cast<GO> (N_R)) {
206 // The GID comes from the remaining P-R processes.
207 const GO g_R = g_0 - static_cast<GO> (N_R);
208 nodeIDs[k] = static_cast<int> (R + g_R / N_L);
209 localIDs[k] = static_cast<int> (g_R % N_L);
210 }
211#ifdef HAVE_TPETRA_DEBUG
212 else {
213 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
214 "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
215 "should never get here. "
216 "Please report this bug to the Tpetra developers.");
217 }
218#endif // HAVE_TPETRA_DEBUG
219 }
220 }
221 else { // don't compute local indices
222 for (size_type k = 0; k < numGids; ++k) {
223 const GO g_0 = globalIDs[k] - g_min;
224 // The first test is a little strange just in case GO is
225 // unsigned. Compilers raise a warning on tests like "x <
226 // 0" if x is unsigned, but don't usually raise a warning if
227 // the expression is a bit more complicated than that.
228 if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
229 nodeIDs[k] = -1;
230 res = IDNotPresent;
231 }
232 else if (g_0 < static_cast<GO> (N_R)) {
233 // The GID comes from the initial sequence of R processes.
234 nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
235 }
236 else if (g_0 >= static_cast<GO> (N_R)) {
237 // The GID comes from the remaining P-R processes.
238 const GO g_R = g_0 - static_cast<GO> (N_R);
239 nodeIDs[k] = static_cast<int> (R + g_R / N_L);
240 }
241#ifdef HAVE_TPETRA_DEBUG
242 else {
243 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
244 "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
245 "should never get here. "
246 "Please report this bug to the Tpetra developers.");
247 }
248#endif // HAVE_TPETRA_DEBUG
249 }
250 }
251 return res;
252 }
253
254 template<class LO, class GO, class NT>
257 {
258 using Teuchos::arcp;
259 using Teuchos::gatherAll;
260 using Teuchos::RCP;
261
262 RCP<const Teuchos::Comm<int> > comm = map.getComm ();
263
264 TEUCHOS_TEST_FOR_EXCEPTION(! map.isDistributed (), std::invalid_argument,
265 Teuchos::typeName (*this) << " constructor: Map is not distributed.");
266 TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
267 Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
268
269 const int numProcs = comm->getSize ();
270
271 // Make room for the min global ID on each process, plus one
272 // entry at the end for the "max cap."
273 allMinGIDs_ = arcp<GO> (numProcs + 1);
274 // Get my process' min global ID.
275 GO minMyGID = map.getMinGlobalIndex ();
276 // Gather all of the min global IDs into the first numProcs
277 // entries of allMinGIDs_.
278
279 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
280 //
281 // The purpose of this giant hack is that gatherAll appears to
282 // interpret the "receive count" argument differently than
283 // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
284 // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
285 // which could relate either to this, or to OpenMPI.
286#ifdef HAVE_TPETRACORE_MPI
287 MPI_Datatype rawMpiType = MPI_INT;
288 bool useRawMpi = true;
289 if (typeid (GO) == typeid (int)) {
290 rawMpiType = MPI_INT;
291 } else if (typeid (GO) == typeid (long)) {
292 rawMpiType = MPI_LONG;
293 } else {
294 useRawMpi = false;
295 }
296 if (useRawMpi) {
297 using Teuchos::rcp_dynamic_cast;
298 using Teuchos::MpiComm;
299 RCP<const MpiComm<int> > mpiComm =
300 rcp_dynamic_cast<const MpiComm<int> > (comm);
301 // It could be a SerialComm instead, even in an MPI build, so
302 // be sure to check.
303 if (! comm.is_null ()) {
304 MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
305 const int err =
306 MPI_Allgather (&minMyGID, 1, rawMpiType,
307 allMinGIDs_.getRawPtr (), 1, rawMpiType,
308 rawMpiComm);
309 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
310 "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
311 } else {
312 gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
313 }
314 } else {
315 gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
316 }
317#else // NOT HAVE_TPETRACORE_MPI
318 gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
319#endif // HAVE_TPETRACORE_MPI
320 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
321
322 //gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
323
324 // Put the max cap at the end. Adding one lets us write loops
325 // over the global IDs with the usual strict less-than bound.
326 allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex ()
327 + Teuchos::OrdinalTraits<GO>::one ();
328 }
329
330 template<class LO, class GO, class NT>
331 std::string
333 {
334 std::ostringstream os;
335 os << "DistributedContiguousDirectory"
336 << "<" << Teuchos::TypeNameTraits<LO>::name ()
337 << ", " << Teuchos::TypeNameTraits<GO>::name ()
338 << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
339 return os.str ();
340 }
341
342 template<class LO, class GO, class NT>
345 getEntriesImpl (const map_type& map,
346 const Teuchos::ArrayView<const GO> &globalIDs,
347 const Teuchos::ArrayView<int> &nodeIDs,
348 const Teuchos::ArrayView<LO> &localIDs,
349 const bool computeLIDs) const
350 {
351 using Teuchos::Array;
352 using Teuchos::ArrayRCP;
353 using Teuchos::ArrayView;
354 using Teuchos::as;
355 using Teuchos::Comm;
356 using Teuchos::RCP;
357
359 RCP<const Teuchos::Comm<int> > comm = map.getComm ();
360 const int myRank = comm->getRank ();
361
362 // Map is on one process or is locally replicated.
363 typename ArrayView<int>::iterator procIter = nodeIDs.begin();
364 typename ArrayView<LO>::iterator lidIter = localIDs.begin();
365 typename ArrayView<const GO>::iterator gidIter;
366 for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
367 if (map.isNodeGlobalElement (*gidIter)) {
368 *procIter++ = myRank;
369 if (computeLIDs) {
370 *lidIter++ = map.getLocalElement (*gidIter);
371 }
372 }
373 else {
374 // Advance the pointers, leaving these values set to invalid
375 procIter++;
376 if (computeLIDs) {
377 lidIter++;
378 }
379 res = IDNotPresent;
380 }
381 }
382 return res;
383 }
384
385 template<class LO, class GO, class NT>
388 getEntriesImpl (const map_type& map,
389 const Teuchos::ArrayView<const GO> &globalIDs,
390 const Teuchos::ArrayView<int> &nodeIDs,
391 const Teuchos::ArrayView<LO> &localIDs,
392 const bool computeLIDs) const
393 {
394 using Teuchos::Array;
395 using Teuchos::ArrayRCP;
396 using Teuchos::ArrayView;
397 using Teuchos::as;
398 using Teuchos::Comm;
399 using Teuchos::RCP;
400
401 RCP<const Teuchos::Comm<int> > comm = map.getComm ();
402 const int numProcs = comm->getSize ();
403 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
404 const GO noGIDsOnProc = std::numeric_limits<GO>::max();
406
407 // Find the first initialized GID for search below
408 int firstProcWithGIDs;
409 for (firstProcWithGIDs = 0; firstProcWithGIDs < numProcs;
410 firstProcWithGIDs++) {
411 if (allMinGIDs_[firstProcWithGIDs] != noGIDsOnProc) break;
412 }
413
414 // If Map is empty, return invalid values for all requested lookups
415 // This case should not happen, as an empty Map is not considered
416 // Distributed.
417 if (firstProcWithGIDs == numProcs) {
418 // No entries in Map
419 res = (globalIDs.size() > 0) ? IDNotPresent : AllIDsPresent;
420 std::fill(nodeIDs.begin(), nodeIDs.end(), -1);
421 if (computeLIDs)
422 std::fill(localIDs.begin(), localIDs.end(), LINVALID);
423 return res;
424 }
425
426 const GO one = as<GO> (1);
427 const GO nOverP = as<GO> (map.getGlobalNumElements ()
428 / as<global_size_t>(numProcs - firstProcWithGIDs));
429
430 // Map is distributed but contiguous.
431 typename ArrayView<int>::iterator procIter = nodeIDs.begin();
432 typename ArrayView<LO>::iterator lidIter = localIDs.begin();
433 typename ArrayView<const GO>::iterator gidIter;
434 for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
435 LO LID = LINVALID; // Assume not found until proven otherwise
436 int image = -1;
437 GO GID = *gidIter;
438 // Guess uniform distribution (TODO: maybe replace by a binary search)
439 // We go through all this trouble to avoid overflow and
440 // signed / unsigned casting mistakes (that were made in
441 // previous versions of this code).
442 int curRank;
443 const GO firstGuess = firstProcWithGIDs + GID / std::max(nOverP, one);
444 curRank = as<int>(std::min(firstGuess, as<GO>(numProcs - 1)));
445
446 // This while loop will stop because
447 // allMinGIDs_[np] == global num elements
448 while (allMinGIDs_[curRank] == noGIDsOnProc) curRank++;
449
450 bool badGID = false;
451 while (curRank >= firstProcWithGIDs && GID < allMinGIDs_[curRank]) {
452 curRank--;
453 while (curRank >= firstProcWithGIDs &&
454 allMinGIDs_[curRank] == noGIDsOnProc) curRank--;
455 }
456 if (curRank < firstProcWithGIDs) {
457 // GID is lower than all GIDs in map
458 badGID = true;
459 }
460 else if (curRank == numProcs) {
461 // GID is higher than all GIDs in map
462 badGID = true;
463 }
464 else {
465 // we have the lower bound; now limit from above
466 int above = curRank + 1;
467 while (allMinGIDs_[above] == noGIDsOnProc) above++;
468
469 while (GID >= allMinGIDs_[above]) {
470 curRank = above;
471 if (curRank == numProcs) {
472 // GID is higher than all GIDs in map
473 badGID = true;
474 break;
475 }
476 above++;
477 while (allMinGIDs_[above] == noGIDsOnProc) above++;
478 }
479 }
480
481 if (!badGID) {
482 image = curRank;
483 LID = as<LO> (GID - allMinGIDs_[image]);
484 }
485 else {
486 res = IDNotPresent;
487 }
488 *procIter++ = image;
489 if (computeLIDs) {
490 *lidIter++ = LID;
491 }
492 }
493 return res;
494 }
495
496 template<class LO, class GO, class NT>
499 oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
500 locallyOneToOne_ (true), // to be revised below
501 useHashTables_ (false) // to be revised below
502 {
503 initialize (map, Teuchos::null);
504 }
505
506 template<class LO, class GO, class NT>
507 DistributedNoncontiguousDirectory<LO, GO, NT>::
508 DistributedNoncontiguousDirectory (const map_type& map,
509 const tie_break_type& tie_break) :
510 oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
511 locallyOneToOne_ (true), // to be revised below
512 useHashTables_ (false) // to be revised below
513 {
514 initialize (map, Teuchos::ptrFromRef (tie_break));
515 }
516
517 template<class LO, class GO, class NT>
518 void
519 DistributedNoncontiguousDirectory<LO, GO, NT>::
520 initialize (const map_type& map,
521 Teuchos::Ptr<const tie_break_type> tie_break)
522 {
523 using Teuchos::arcp;
524 using Teuchos::Array;
525 using Teuchos::ArrayRCP;
526 using Teuchos::ArrayView;
527 using Teuchos::as;
528 using Teuchos::RCP;
529 using Teuchos::rcp;
530 using Teuchos::typeName;
531 using Teuchos::TypeNameTraits;
532 using std::cerr;
533 using std::endl;
534 typedef Array<int>::size_type size_type;
535
536 // This class' implementation of getEntriesImpl() currently
537 // encodes the following assumptions:
538 //
539 // 1. global_size_t >= GO
540 // 2. global_size_t >= int
541 // 3. global_size_t >= LO
542 //
543 // We check these assumptions here.
544 TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
545 std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
546 "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Global"
547 "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(GO)
548 << ".");
549 TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
550 std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
551 "global_size_t) = " << sizeof(global_size_t) << " < sizeof(int) = "
552 << sizeof(int) << ".");
553 TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
554 std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
555 "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Local"
556 "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(LO)
557 << ".");
558
559 RCP<const Teuchos::Comm<int> > comm = map.getComm ();
560 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
561 const GO minAllGID = map.getMinAllGlobalIndex ();
562 const GO maxAllGID = map.getMaxAllGlobalIndex ();
563
564 // The "Directory Map" (see below) will have a range of elements
565 // from the minimum to the maximum GID of the user Map, and a
566 // minimum GID of minAllGID from the user Map. It doesn't
567 // actually have to store all those entries, though do beware of
568 // calling getLocalElementList on it (see Bug 5822).
569 const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
570
571 // We can't afford to replicate the whole directory on each
572 // process, so create the "Directory Map", a uniform contiguous
573 // Map that describes how we will distribute the directory over
574 // processes.
575 //
576 // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
577 // the index base. The index base should be separate from the
578 // minimum GID.
579 directoryMap_ = rcp (new map_type (numGlobalEntries, minAllGID, comm,
580 GloballyDistributed));
581 // The number of Directory elements that my process owns.
582 const size_t dir_numMyEntries = directoryMap_->getLocalNumElements ();
583
584 // Fix for Bug 5822: If the input Map is "sparse," that is if
585 // the difference between the global min and global max GID is
586 // much larger than the global number of elements in the input
587 // Map, then it's possible that the Directory Map might have
588 // many more entries than the input Map on this process. This
589 // can cause memory scalability issues. In that case, we switch
590 // from the array-based implementation of Directory storage to
591 // the hash table - based implementation. We don't use hash
592 // tables all the time, because they are slower in the common
593 // case of a nonsparse Map.
594 //
595 // NOTE: This is a per-process decision. Some processes may use
596 // array-based storage, whereas others may use hash table -
597 // based storage.
598
599 // A hash table takes a constant factor more space, more or
600 // less, than an array. Thus, it's not worthwhile, even in
601 // terms of memory usage, always to use a hash table.
602 // Furthermore, array lookups are faster than hash table
603 // lookups, so it may be worthwhile to use an array even if it
604 // takes more space. The "sparsity threshold" governs when to
605 // switch to a hash table - based implementation.
606 const size_t inverseSparsityThreshold = 10;
607 useHashTables_ =
608 (dir_numMyEntries >= inverseSparsityThreshold * map.getLocalNumElements());
609
610 // Get list of process IDs that own the directory entries for the
611 // Map GIDs. These will be the targets of the sends that the
612 // Distributor will do.
613 const int myRank = comm->getRank ();
614 const size_t numMyEntries = map.getLocalNumElements ();
615 Array<int> sendImageIDs (numMyEntries);
616 ArrayView<const GO> myGlobalEntries = map.getLocalElementList ();
617 // An ID not present in this lookup indicates that it lies outside
618 // of the range [minAllGID,maxAllGID] (from map_). this means
619 // something is wrong with map_, our fault.
620 const LookupStatus lookupStatus =
621 directoryMap_->getRemoteIndexList (myGlobalEntries, sendImageIDs);
622 TEUCHOS_TEST_FOR_EXCEPTION(
623 lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this)
624 << " constructor: the Directory Map could not find out where one or "
625 "more of my Map's indices should go. The input to getRemoteIndexList "
626 "is " << Teuchos::toString (myGlobalEntries) << ", and the output is "
627 << Teuchos::toString (sendImageIDs ()) << ". The input Map itself has "
628 "the following entries on the calling process " <<
629 map.getComm ()->getRank () << ": " <<
630 Teuchos::toString (map.getLocalElementList ()) << ", and has "
631 << map.getGlobalNumElements () << " total global indices in ["
632 << map.getMinAllGlobalIndex () << "," << map.getMaxAllGlobalIndex ()
633 << "]. The Directory Map has "
634 << directoryMap_->getGlobalNumElements () << " total global indices in "
635 "[" << directoryMap_->getMinAllGlobalIndex () << "," <<
636 directoryMap_->getMaxAllGlobalIndex () << "], and the calling process "
637 "has GIDs [" << directoryMap_->getMinGlobalIndex () << "," <<
638 directoryMap_->getMaxGlobalIndex () << "]. "
639 "This probably means there is a bug in Map or Directory. "
640 "Please report this bug to the Tpetra developers.");
641
642 // Initialize the distributor using the list of process IDs to
643 // which to send. We'll use the distributor to send out triples
644 // of (GID, process ID, LID). We're sending the entries to the
645 // processes that the Directory Map says should own them, which is
646 // why we called directoryMap_->getRemoteIndexList() above.
647 Distributor distor (comm);
648 const size_t numReceives = distor.createFromSends (sendImageIDs);
649
650 // NOTE (mfh 21 Mar 2012) The following code assumes that
651 // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
652 //
653 // Create and fill buffer of (GID, PID, LID) triples to send
654 // out. We pack the (GID, PID, LID) triples into a single Array
655 // of GO, casting the PID from int to GO and the LID from LO to
656 // GO as we do so.
657 //
658 // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
659 // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
660 // required, and the latter is generally the case, but we should
661 // still check for this.
662 const int packetSize = 3; // We're sending triples, so packet size is 3.
663 Kokkos::View<GO*, Kokkos::HostSpace> exportEntries("exportEntries", packetSize * numMyEntries);
664 {
665 size_t exportIndex = 0;
666 for (size_t i = 0; i < numMyEntries; ++i) {
667 exportEntries[exportIndex++] = myGlobalEntries[i];
668 exportEntries[exportIndex++] = as<GO> (myRank);
669 exportEntries[exportIndex++] = as<GO> (i);
670 }
671 }
672 // Buffer of data to receive. The Distributor figured out for
673 // us how many packets we're receiving, when we called its
674 // createFromSends() method to set up the distribution plan.
675 Kokkos::View<GO*, Kokkos::HostSpace> importElements("importElements", packetSize * distor.getTotalReceiveLength());
676
677 // Distribute the triples of (GID, process ID, LID).
678 distor.doPostsAndWaits(exportEntries, packetSize, importElements);
679
680 // Unpack the redistributed data. Both implementations of
681 // Directory storage map from an LID in the Directory Map (which
682 // is the LID of the GID to store) to either a PID or an LID in
683 // the input Map. Each "packet" (contiguous chunk of
684 // importElements) contains a triple: (GID, PID, LID).
685 if (useHashTables_) {
686 // Create the hash tables. We know exactly how many elements
687 // to expect in each hash table. FixedHashTable's constructor
688 // currently requires all the keys and values at once, so we
689 // have to extract them in temporary arrays. It may be
690 // possible to rewrite FixedHashTable to use a "start fill" /
691 // "end fill" approach that avoids the temporary arrays, but
692 // we won't try that for now.
693
694 // The constructors of Array and ArrayRCP that take a number
695 // of elements all initialize the arrays. Instead, allocate
696 // raw arrays, then hand them off to ArrayRCP, to avoid the
697 // initial unnecessary initialization without losing the
698 // benefit of exception safety (and bounds checking, in a
699 // debug build).
700 LO* tableKeysRaw = NULL;
701 LO* tableLidsRaw = NULL;
702 int* tablePidsRaw = NULL;
703 try {
704 tableKeysRaw = new LO [numReceives];
705 tableLidsRaw = new LO [numReceives];
706 tablePidsRaw = new int [numReceives];
707 } catch (...) {
708 if (tableKeysRaw != NULL) {
709 delete [] tableKeysRaw;
710 }
711 if (tableLidsRaw != NULL) {
712 delete [] tableLidsRaw;
713 }
714 if (tablePidsRaw != NULL) {
715 delete [] tablePidsRaw;
716 }
717 throw;
718 }
719 ArrayRCP<LO> tableKeys (tableKeysRaw, 0, numReceives, true);
720 ArrayRCP<LO> tableLids (tableLidsRaw, 0, numReceives, true);
721 ArrayRCP<int> tablePids (tablePidsRaw, 0, numReceives, true);
722
723 if (tie_break.is_null ()) {
724 // Fill the temporary arrays of keys and values.
725 size_type importIndex = 0;
726 for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
727 const GO curGID = importElements[importIndex++];
728 const LO curLID = directoryMap_->getLocalElement (curGID);
729 TEUCHOS_TEST_FOR_EXCEPTION(
730 curLID == LINVALID, std::logic_error,
731 Teuchos::typeName(*this) << " constructor: Incoming global index "
732 << curGID << " does not have a corresponding local index in the "
733 "Directory Map. Please report this bug to the Tpetra developers.");
734 tableKeys[i] = curLID;
735 tablePids[i] = importElements[importIndex++];
736 tableLids[i] = importElements[importIndex++];
737 }
738 // Set up the hash tables. The hash tables' constructor
739 // detects whether there are duplicates, so that we can set
740 // locallyOneToOne_.
741 lidToPidTable_ =
742 rcp (new lidToPidTable_type (tableKeys (), tablePids ()));
743 locallyOneToOne_ = ! (lidToPidTable_->hasDuplicateKeys ());
744 lidToLidTable_ =
745 rcp (new lidToLidTable_type (tableKeys (), tableLids ()));
746 }
747 else { // tie_break is NOT null
748
749 // For each directory Map LID received, collect all the
750 // corresponding (PID,LID) pairs. If the input Map is not
751 // one-to-one, corresponding directory Map LIDs will have
752 // more than one pair. In that case, we will use the
753 // TieBreak object to pick exactly one pair.
754 typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
755 pair_table_type ownedPidLidPairs;
756
757 // For each directory Map LID received, collect the zero or
758 // more input Map (PID,LID) pairs into ownedPidLidPairs.
759 size_type importIndex = 0;
760 for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
761 const GO curGID = importElements[importIndex++];
762 const LO dirMapLid = directoryMap_->getLocalElement (curGID);
763 TEUCHOS_TEST_FOR_EXCEPTION(
764 dirMapLid == LINVALID, std::logic_error,
765 Teuchos::typeName(*this) << " constructor: Incoming global index "
766 << curGID << " does not have a corresponding local index in the "
767 "Directory Map. Please report this bug to the Tpetra developers.");
768 tableKeys[i] = dirMapLid;
769 const int PID = importElements[importIndex++];
770 const int LID = importElements[importIndex++];
771
772 // These may change below. We fill them in just to ensure
773 // that they won't have invalid values.
774 tablePids[i] = PID;
775 tableLids[i] = LID;
776
777 // For every directory Map LID, we have to remember all
778 // (PID, LID) pairs. The TieBreak object will arbitrate
779 // between them in the loop below.
780 ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
781 }
782
783 // Use TieBreak to arbitrate between (PID,LID) pairs
784 // corresponding to each directory Map LID.
785 //
786 // FIXME (mfh 23 Mar 2014) How do I know that i is the same
787 // as the directory Map LID?
788 // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
789 // contiguous, but the directory map is. Need to set tablePids[i]
790 // and tableLids[i], so need to loop over numReceives (as that is
791 // how those arrays are allocated). FIXED
792
793 for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
794 const LO dirMapLid = tableKeys[i];
795 const std::vector<std::pair<int, LO> >& pidLidList =
796 ownedPidLidPairs[dirMapLid];
797 const size_t listLen = pidLidList.size();
798 if (listLen == 0) continue; // KDD This will never happen
799 const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
800 if (listLen > 1) {
801 locallyOneToOne_ = false;
802 }
803 // If there is some (PID,LID) pair for the current input
804 // Map LID, then it makes sense to invoke the TieBreak
805 // object to arbitrate between the options. Even if
806 // there is only one (PID,LID) pair, we still want to
807 // give the TieBreak object a chance to do whatever it
808 // likes to do, in terms of side effects (e.g., track
809 // (PID,LID) pairs).
810 const size_type index =
811 static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
812 pidLidList));
813 tablePids[i] = pidLidList[index].first;
814 tableLids[i] = pidLidList[index].second;
815 }
816
817 // Set up the hash tables.
818 lidToPidTable_ =
819 rcp (new lidToPidTable_type (tableKeys (), tablePids ()));
820 lidToLidTable_ =
821 rcp (new lidToLidTable_type (tableKeys (), tableLids ()));
822 }
823 }
824 else {
825 if (tie_break.is_null ()) {
826 // Use array-based implementation of Directory storage.
827 // Allocate these arrays and fill them with invalid values,
828 // in case the input Map's GID list is sparse (i.e., does
829 // not populate all GIDs from minAllGID to maxAllGID).
830 PIDs_ = arcp<int> (dir_numMyEntries);
831 std::fill (PIDs_.begin (), PIDs_.end (), -1);
832 LIDs_ = arcp<LO> (dir_numMyEntries);
833 std::fill (LIDs_.begin (), LIDs_.end (), LINVALID);
834 // Fill in the arrays with PIDs resp. LIDs.
835 size_type importIndex = 0;
836 for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
837 const GO curGID = importElements[importIndex++];
838 const LO curLID = directoryMap_->getLocalElement (curGID);
839 TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
840 Teuchos::typeName(*this) << " constructor: Incoming global index "
841 << curGID << " does not have a corresponding local index in the "
842 "Directory Map. Please report this bug to the Tpetra developers.");
843
844 // If PIDs_[curLID] is not -1, then curGID is a duplicate
845 // on the calling process, so the Directory is not locally
846 // one-to-one.
847 if (PIDs_[curLID] != -1) {
848 locallyOneToOne_ = false;
849 }
850 PIDs_[curLID] = importElements[importIndex++];
851 LIDs_[curLID] = importElements[importIndex++];
852 }
853 }
854 else {
855 PIDs_ = arcp<int> (dir_numMyEntries);
856 LIDs_ = arcp<LO> (dir_numMyEntries);
857 std::fill (PIDs_.begin (), PIDs_.end (), -1);
858
859 // All received (PID, LID) pairs go into ownedPidLidPairs.
860 // This is a map from the directory Map's LID to the (PID,
861 // LID) pair (where the latter LID comes from the input Map,
862 // not the directory Map). If the input Map is not
863 // one-to-one, corresponding LIDs will have
864 // ownedPidLidPairs[curLID].size() > 1. In that case, we
865 // will use the TieBreak object to pick exactly one pair.
866 Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs (dir_numMyEntries);
867 size_t importIndex = 0;
868 for (size_t i = 0; i < numReceives; ++i) {
869 const GO GID = importElements[importIndex++];
870 const int PID = importElements[importIndex++];
871 const LO LID = importElements[importIndex++];
872
873 const LO dirMapLid = directoryMap_->getLocalElement (GID);
874 TEUCHOS_TEST_FOR_EXCEPTION(
875 dirMapLid == LINVALID, std::logic_error,
876 Teuchos::typeName(*this) << " constructor: Incoming global index "
877 << GID << " does not have a corresponding local index in the "
878 "Directory Map. Please report this bug to the Tpetra developers.");
879 ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
880 }
881
882 // Use TieBreak to arbitrate between (PID,LID) pairs
883 // corresponding to each directory Map LID.
884 //
885 // FIXME (mfh 23 Mar 2014) How do I know that i is the same
886 // as the directory Map LID?
887 // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
888 // contiguous. Loop over all ownedPidLidPairs; skip those that have
889 // empty lists. FIXED
890
891 for (size_t i = 0; i < dir_numMyEntries; ++i) {
892 const std::vector<std::pair<int, LO> >& pidLidList =
893 ownedPidLidPairs[i];
894 const size_t listLen = pidLidList.size();
895 if (listLen == 0) continue; // KDD will happen for GIDs not in
896 // KDD the user's source map
897 const LO dirMapLid = static_cast<LO> (i);
898 const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
899 if (listLen > 1) {
900 locallyOneToOne_ = false;
901 }
902 // If there is some (PID,LID) pair for the current input
903 // Map LID, then it makes sense to invoke the TieBreak
904 // object to arbitrate between the options. Even if
905 // there is only one (PID,LID) pair, we still want to
906 // give the TieBreak object a chance to do whatever it
907 // likes to do, in terms of side effects (e.g., track
908 // (PID,LID) pairs).
909 const size_type index =
910 static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
911 pidLidList));
912 PIDs_[i] = pidLidList[index].first;
913 LIDs_[i] = pidLidList[index].second;
914 }
915 }
916 }
917 }
918
919 template<class LO, class GO, class NT>
920 std::string
922 {
923 std::ostringstream os;
924 os << "DistributedNoncontiguousDirectory"
925 << "<" << Teuchos::TypeNameTraits<LO>::name ()
926 << ", " << Teuchos::TypeNameTraits<GO>::name ()
927 << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
928 return os.str ();
929 }
930
931 template<class LO, class GO, class NT>
934 getEntriesImpl (const map_type& map,
935 const Teuchos::ArrayView<const GO> &globalIDs,
936 const Teuchos::ArrayView<int> &nodeIDs,
937 const Teuchos::ArrayView<LO> &localIDs,
938 const bool computeLIDs) const
939 {
940 using Teuchos::Array;
941 using Teuchos::ArrayRCP;
942 using Teuchos::ArrayView;
943 using Teuchos::as;
944 using Teuchos::RCP;
945 using Teuchos::toString;
946 using Details::Behavior;
948 using std::cerr;
949 using std::endl;
950 using size_type = typename Array<GO>::size_type;
951 const char funcPrefix[] = "Tpetra::"
952 "DistributedNoncontiguousDirectory::getEntriesImpl: ";
953 const char errSuffix[] =
954 " Please report this bug to the Tpetra developers.";
955
956 RCP<const Teuchos::Comm<int> > comm = map.getComm ();
957 const bool verbose = Behavior::verbose ("Directory") ||
958 Behavior::verbose ("Tpetra::Directory");
959 const size_t maxNumToPrint = verbose ?
961
962 std::unique_ptr<std::string> procPrefix;
963 if (verbose) {
964 std::ostringstream os;
965 os << "Proc ";
966 if (map.getComm ().is_null ()) {
967 os << "?";
968 }
969 else {
970 os << map.getComm ()->getRank ();
971 }
972 os << ": ";
973 procPrefix = std::unique_ptr<std::string>(
974 new std::string(os.str()));
975 os << funcPrefix << "{";
976 verbosePrintArray(os, globalIDs, "GIDs", maxNumToPrint);
977 os << ", ";
978 verbosePrintArray(os, nodeIDs, "PIDs", maxNumToPrint);
979 os << ", ";
980 verbosePrintArray(os, localIDs, "LIDs", maxNumToPrint);
981 os << ", computeLIDs: "
982 << (computeLIDs ? "true" : "false") << "}" << endl;
983 cerr << os.str ();
984 }
985
986 const size_t numEntries = globalIDs.size ();
987 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
989
990 //
991 // Set up directory structure.
992 //
993
994 // If we're computing LIDs, we also have to include them in each
995 // packet, along with the GID and process ID.
996 const int packetSize = computeLIDs ? 3 : 2;
997
998 // For data distribution, we use: Surprise! A Distributor!
999 Distributor distor (comm);
1000
1001 // Get directory locations for the requested list of entries.
1002 Array<int> dirImages (numEntries);
1003 if (verbose) {
1004 std::ostringstream os;
1005 os << *procPrefix << "Call directoryMap_->getRemoteIndexList"
1006 << endl;
1007 cerr << os.str ();
1008 }
1009 res = directoryMap_->getRemoteIndexList (globalIDs, dirImages ());
1010 if (verbose) {
1011 std::ostringstream os;
1012 os << *procPrefix << "Director Map getRemoteIndexList out ";
1013 verbosePrintArray(os, dirImages, "PIDs", maxNumToPrint);
1014 os << endl;
1015 cerr << os.str ();
1016 }
1017
1018 // Check for unfound globalIDs and set corresponding nodeIDs to -1
1019 size_t numMissing = 0;
1020 if (res == IDNotPresent) {
1021 for (size_t i=0; i < numEntries; ++i) {
1022 if (dirImages[i] == -1) {
1023 nodeIDs[i] = -1;
1024 if (computeLIDs) {
1025 localIDs[i] = LINVALID;
1026 }
1027 numMissing++;
1028 }
1029 }
1030 }
1031
1032 Array<GO> sendGIDs;
1033 Array<int> sendImages;
1034 if (verbose) {
1035 std::ostringstream os;
1036 os << *procPrefix << "Call Distributor::createFromRecvs"
1037 << endl;
1038 cerr << os.str ();
1039 }
1040 distor.createFromRecvs (globalIDs, dirImages (), sendGIDs, sendImages);
1041 if (verbose) {
1042 std::ostringstream os;
1043 os << *procPrefix << "Distributor::createFromRecvs result: "
1044 << "{";
1045 verbosePrintArray(os, sendGIDs, "sendGIDs", maxNumToPrint);
1046 os << ", ";
1047 verbosePrintArray(os, sendImages, "sendPIDs", maxNumToPrint);
1048 os << "}" << endl;
1049 cerr << os.str ();
1050 }
1051 const size_type numSends = sendGIDs.size ();
1052
1053 //
1054 // mfh 13 Nov 2012:
1055 //
1056 // The code below temporarily stores LO, GO, and int values in
1057 // an array of global_size_t. If one of the signed types (LO
1058 // and GO should both be signed) happened to be -1 (or some
1059 // negative number, but -1 is the one that came up today), then
1060 // conversion to global_size_t will result in a huge
1061 // global_size_t value, and thus conversion back may overflow.
1062 // (Teuchos::as doesn't know that we meant it to be an LO or GO
1063 // all along.)
1064 //
1065 // The overflow normally would not be a problem, since it would
1066 // just go back to -1 again. However, Teuchos::as does range
1067 // checking on conversions in a debug build, so it throws an
1068 // exception (std::range_error) in this case. Range checking is
1069 // generally useful in debug mode, so we don't want to disable
1070 // this behavior globally.
1071 //
1072 // We solve this problem by forgoing use of Teuchos::as for the
1073 // conversions below from LO, GO, or int to global_size_t, and
1074 // the later conversions back from global_size_t to LO, GO, or
1075 // int.
1076 //
1077 // I've recorded this discussion as Bug 5760.
1078 //
1079
1080 // global_size_t >= GO
1081 // global_size_t >= size_t >= int
1082 // global_size_t >= size_t >= LO
1083 // Therefore, we can safely store all of these in a global_size_t
1084 Kokkos::View<global_size_t*, Kokkos::HostSpace> exports("exports", packetSize * numSends);
1085 {
1086 // Packet format:
1087 // - If computing LIDs: (GID, PID, LID)
1088 // - Otherwise: (GID, PID)
1089 //
1090 // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1091
1092 // Current position to which to write in exports array. If
1093 // sending pairs, we pack the (GID, PID) pair for gid =
1094 // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1095 // triples, we pack the (GID, PID, LID) pair for gid =
1096 // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1097 size_t exportsIndex = 0;
1098
1099 if (useHashTables_) {
1100 if (verbose) {
1101 std::ostringstream os;
1102 os << *procPrefix << "Pack exports (useHashTables_ true)"
1103 << endl;
1104 cerr << os.str ();
1105 }
1106 for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1107 const GO curGID = sendGIDs[gidIndex];
1108 // Don't use as() here (see above note).
1109 exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1110 const LO curLID = directoryMap_->getLocalElement (curGID);
1111 TEUCHOS_TEST_FOR_EXCEPTION
1112 (curLID == LINVALID, std::logic_error, funcPrefix <<
1113 "Directory Map's global index " << curGID << " lacks "
1114 "a corresponding local index." << errSuffix);
1115 // Don't use as() here (see above note).
1116 exports[exportsIndex++] =
1117 static_cast<global_size_t> (lidToPidTable_->get (curLID));
1118 if (computeLIDs) {
1119 // Don't use as() here (see above note).
1120 exports[exportsIndex++] =
1121 static_cast<global_size_t> (lidToLidTable_->get (curLID));
1122 }
1123 }
1124 }
1125 else {
1126 if (verbose) {
1127 std::ostringstream os;
1128 os << *procPrefix << "Pack exports (useHashTables_ false)"
1129 << endl;
1130 cerr << os.str ();
1131 }
1132 for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1133 const GO curGID = sendGIDs[gidIndex];
1134 // Don't use as() here (see above note).
1135 exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1136 const LO curLID = directoryMap_->getLocalElement (curGID);
1137 TEUCHOS_TEST_FOR_EXCEPTION
1138 (curLID == LINVALID, std::logic_error, funcPrefix <<
1139 "Directory Map's global index " << curGID << " lacks "
1140 "a corresponding local index." << errSuffix);
1141 // Don't use as() here (see above note).
1142 exports[exportsIndex++] =
1143 static_cast<global_size_t> (PIDs_[curLID]);
1144 if (computeLIDs) {
1145 // Don't use as() here (see above note).
1146 exports[exportsIndex++] =
1147 static_cast<global_size_t> (LIDs_[curLID]);
1148 }
1149 }
1150 }
1151
1152 TEUCHOS_TEST_FOR_EXCEPTION
1153 (exportsIndex > exports.size(), std::logic_error,
1154 funcPrefix << "On Process " << comm->getRank () << ", "
1155 "exportsIndex = " << exportsIndex << " > exports.size() = "
1156 << exports.size () << "." << errSuffix);
1157 }
1158
1159 TEUCHOS_TEST_FOR_EXCEPTION
1160 (numEntries < numMissing, std::logic_error, funcPrefix <<
1161 "On Process " << comm->getRank () << ", numEntries = " <<
1162 numEntries << " < numMissing = " << numMissing << "." <<
1163 errSuffix);
1164
1165 //
1166 // mfh 13 Nov 2012: See note above on conversions between
1167 // global_size_t and LO, GO, or int.
1168 //
1169 const size_t numRecv = numEntries - numMissing;
1170
1171 {
1172 const size_t importLen = packetSize * distor.getTotalReceiveLength ();
1173 const size_t requiredImportLen = numRecv * packetSize;
1174 const int myRank = comm->getRank ();
1175 TEUCHOS_TEST_FOR_EXCEPTION(
1176 importLen < requiredImportLen, std::logic_error,
1177 "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1178 "On Process " << myRank << ": The 'imports' array must have length "
1179 "at least " << requiredImportLen << ", but its actual length is " <<
1180 importLen << ". numRecv: " << numRecv << ", packetSize: " <<
1181 packetSize << ", numEntries (# GIDs): " << numEntries <<
1182 ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1183 << distor.getTotalReceiveLength () << ". " << std::endl <<
1184 "Distributor description: " << distor.description () << ". "
1185 << std::endl <<
1186 "Please report this bug to the Tpetra developers.");
1187 }
1188
1189 Kokkos::View<global_size_t*, Kokkos::HostSpace> imports("imports", packetSize * distor.getTotalReceiveLength());
1190 // FIXME (mfh 20 Mar 2014) One could overlap the sort2() below
1191 // with communication, by splitting this call into doPosts and
1192 // doWaits. The code is still correct in this form, however.
1193 if (verbose) {
1194 std::ostringstream os;
1195 os << *procPrefix << "Call doPostsAndWaits: {"
1196 << "packetSize: " << packetSize << ", ";
1197 verbosePrintArray(os, exports, "exports", maxNumToPrint);
1198 os << "}" << endl;
1199 cerr << os.str ();
1200 }
1201 distor.doPostsAndWaits(exports, packetSize, imports);
1202 if (verbose) {
1203 std::ostringstream os;
1204 os << *procPrefix << "doPostsAndWaits result: ";
1205 verbosePrintArray(os, imports, "imports", maxNumToPrint);
1206 os << endl;
1207 cerr << os.str ();
1208 }
1209
1210 Array<GO> sortedIDs (globalIDs); // deep copy (for later sorting)
1211 Array<GO> offset (numEntries); // permutation array (sort2 output)
1212 for (GO ii = 0; ii < static_cast<GO> (numEntries); ++ii) {
1213 offset[ii] = ii;
1214 }
1215 sort2 (sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin());
1216 if (verbose) {
1217 std::ostringstream os;
1218 os << *procPrefix;
1219 verbosePrintArray(os, sortedIDs, "sortedIDs", maxNumToPrint);
1220 os << ", ";
1221 verbosePrintArray(os, offset, "offset", maxNumToPrint);
1222 os << endl;
1223 cerr << os.str ();
1224 }
1225
1226 size_t importsIndex = 0;
1227
1228 // we know these conversions are in range, because we loaded this data
1229 //
1230 // Don't use as() for conversions here; we know they are in range.
1231 for (size_t i = 0; i < numRecv; ++i) {
1232 const GO curGID = static_cast<GO> (imports[importsIndex++]);
1233 auto p1 = std::equal_range (sortedIDs.begin(),
1234 sortedIDs.end(), curGID);
1235 if (p1.first != p1.second) {
1236 const size_t j = p1.first - sortedIDs.begin();
1237 nodeIDs[offset[j]] =
1238 static_cast<int> (imports[importsIndex++]);
1239 if (computeLIDs) {
1240 localIDs[offset[j]] =
1241 static_cast<LO> (imports[importsIndex++]);
1242 }
1243 if (nodeIDs[offset[j]] == -1) {
1244 res = IDNotPresent;
1245 }
1246 }
1247 }
1248
1249 TEUCHOS_TEST_FOR_EXCEPTION
1250 (size_t (importsIndex) > size_t (imports.size ()),
1251 std::logic_error, funcPrefix << "On Process " <<
1252 comm->getRank () << ": importsIndex = " << importsIndex
1253 << " > imports.size() = " << imports.size () << ". "
1254 "numRecv: " << numRecv
1255 << ", packetSize: " << packetSize << ", "
1256 "numEntries (# GIDs): " << numEntries
1257 << ", numMissing: " << numMissing
1258 << ": distor.getTotalReceiveLength(): "
1259 << distor.getTotalReceiveLength () << "." << errSuffix);
1260 if (verbose) {
1261 std::ostringstream os;
1262 os << *procPrefix << funcPrefix << "Done!" << endl;
1263 cerr << os.str ();
1264 }
1265 return res;
1266 }
1267
1268
1269 template<class LO, class GO, class NT>
1270 bool
1272 isOneToOne (const Teuchos::Comm<int>& comm) const
1273 {
1274 if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1275 const int lcl121 = isLocallyOneToOne () ? 1 : 0;
1276 int gbl121 = 0;
1277 Teuchos::reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, lcl121,
1278 Teuchos::outArg (gbl121));
1279 oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1280 }
1281 return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1282 }
1283 } // namespace Details
1284} // namespace Tpetra
1285
1286//
1287// Explicit instantiation macro
1288//
1289// Must be expanded from within the Tpetra namespace!
1290//
1291#define TPETRA_DIRECTORYIMPL_INSTANT(LO,GO,NODE) \
1292 namespace Details { \
1293 template class Directory< LO , GO , NODE >; \
1294 template class ReplicatedDirectory< LO , GO , NODE >; \
1295 template class ContiguousUniformDirectory< LO, GO, NODE >; \
1296 template class DistributedContiguousDirectory< LO , GO , NODE >; \
1297 template class DistributedNoncontiguousDirectory< LO , GO , NODE >; \
1298 }
1299
1300#endif // TPETRA_DIRECTORYIMPL_DEF_HPP
Interface for breaking ties in ownership.
Stand-alone utility functions and macros.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Description of Tpetra's behavior.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Implementation of Directory for a contiguous, uniformly distributed Map.
std::string description() const override
A one-line human-readable description of this object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
virtual LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const =0
Actually do the work of getEntries(), with no input validation.
Implementation of Directory for a distributed contiguous Map.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const override
A one-line human-readable description of this object.
Implementation of Directory for a distributed noncontiguous Map.
std::string description() const override
A one-line human-readable description of this object.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory's input Map is (globally) one to one.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory's input Map is (globally) one to one.
std::string description() const override
A one-line human-readable description of this object.
ReplicatedDirectory()=default
Constructor (that takes no arguments).
Sets up and executes a communication plan for a Tpetra DistObject.
std::enable_if<(Kokkos::is_view< ExpView >::value &&Kokkos::is_view< ImpView >::value)>::type doPostsAndWaits(const ExpView &exports, size_t numPackets, const ImpView &imports)
Execute the (forward) communication plan.
void createFromRecvs(const Teuchos::ArrayView< const Ordinal > &remoteIDs, const Teuchos::ArrayView< const int > &remoteProcIDs, Teuchos::Array< Ordinal > &exportIDs, Teuchos::Array< int > &exportProcIDs)
Set up Distributor using list of process ranks from which to receive.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
std::string description() const
Return a one-line description of this object.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Nonmember function that computes a residual Computes R = B - A * X.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void initialize(int *argc, char ***argv)
Initialize Tpetra.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t global_size_t
Global size_t object.