Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Export_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_EXPORT_DEF_HPP
11#define TPETRA_EXPORT_DEF_HPP
12
13
14#include "Tpetra_Distributor.hpp"
15#include "Tpetra_Map.hpp"
16#include "Tpetra_ImportExportData.hpp"
17#include "Tpetra_Util.hpp"
18#include "Tpetra_Import.hpp"
19#include "Tpetra_Details_DualViewUtil.hpp"
21#include "Teuchos_as.hpp"
22#include "Teuchos_Array.hpp"
23#include "Teuchos_FancyOStream.hpp"
24#include "Teuchos_ParameterList.hpp"
25#include <memory>
26
27namespace Tpetra {
28
29 template <class LocalOrdinal, class GlobalOrdinal, class Node>
31 Export (const Teuchos::RCP<const map_type >& source,
32 const Teuchos::RCP<const map_type >& target,
33 const Teuchos::RCP<Teuchos::FancyOStream>& out,
34 const Teuchos::RCP<Teuchos::ParameterList>& plist) :
35 base_type (source, target, out, plist, "Export")
36 {
37 using Teuchos::rcp;
38 using std::endl;
40 ProfilingRegion regionExport ("Tpetra::Export::Export");
41
42 if (this->verbose ()) {
43 std::ostringstream os;
44 const int myRank = source->getComm ()->getRank ();
45 os << myRank << ": Export ctor" << endl;
46 this->verboseOutputStream () << os.str ();
47 }
48 Teuchos::Array<GlobalOrdinal> exportGIDs;
49 setupSamePermuteExport (exportGIDs);
50 if (source->isDistributed ()) {
51 setupRemote (exportGIDs);
52 }
53
54 TEUCHOS_ASSERT( ! this->TransferData_->permuteFromLIDs_.need_sync_device () );
55 TEUCHOS_ASSERT( ! this->TransferData_->permuteFromLIDs_.need_sync_host () );
56 TEUCHOS_ASSERT( ! this->TransferData_->permuteToLIDs_.need_sync_device () );
57 TEUCHOS_ASSERT( ! this->TransferData_->permuteToLIDs_.need_sync_host () );
58 TEUCHOS_ASSERT( ! this->TransferData_->remoteLIDs_.need_sync_device () );
59 TEUCHOS_ASSERT( ! this->TransferData_->remoteLIDs_.need_sync_host () );
60 TEUCHOS_ASSERT( ! this->TransferData_->exportLIDs_.need_sync_device () );
61 TEUCHOS_ASSERT( ! this->TransferData_->exportLIDs_.need_sync_host () );
62
63 this->detectRemoteExportLIDsContiguous();
64
65 if (this->verbose ()) {
66 std::ostringstream os;
67 const int myRank = source->getComm ()->getRank ();
68 os << myRank << ": Export ctor: done" << endl;
69 this->verboseOutputStream () << os.str ();
70 }
71 }
72
73 template <class LocalOrdinal, class GlobalOrdinal, class Node>
75 Export (const Teuchos::RCP<const map_type>& source,
76 const Teuchos::RCP<const map_type>& target) :
77 Export (source, target, Teuchos::null, Teuchos::null)
78 {}
79
80 template <class LocalOrdinal, class GlobalOrdinal, class Node>
82 Export (const Teuchos::RCP<const map_type >& source,
83 const Teuchos::RCP<const map_type >& target,
84 const Teuchos::RCP<Teuchos::FancyOStream>& out) :
85 Export (source, target, out, Teuchos::null)
86 {}
87
88 template <class LocalOrdinal, class GlobalOrdinal, class Node>
90 Export (const Teuchos::RCP<const map_type >& source,
91 const Teuchos::RCP<const map_type >& target,
92 const Teuchos::RCP<Teuchos::ParameterList>& plist) :
93 Export (source, target, Teuchos::null, plist)
94 {}
95
96 template <class LocalOrdinal, class GlobalOrdinal, class Node>
101
102 template <class LocalOrdinal, class GlobalOrdinal, class Node>
105 base_type (importer, typename base_type::reverse_tag ())
106 {}
107
108 template <class LocalOrdinal, class GlobalOrdinal, class Node>
109 void
111 describe (Teuchos::FancyOStream& out,
112 const Teuchos::EVerbosityLevel verbLevel) const
113 {
114 // Call the base class' method. It does all the work.
115 this->describeImpl (out, "Tpetra::Export", verbLevel);
116 }
117
118 template <class LocalOrdinal, class GlobalOrdinal, class Node>
120 print (std::ostream& os) const
121 {
122 auto out = Teuchos::getFancyOStream (Teuchos::rcpFromRef (os));
123 // "Print" traditionally meant "everything."
124 this->describe (*out, Teuchos::VERB_EXTREME);
125 }
126
127 template <class LocalOrdinal, class GlobalOrdinal, class Node>
128 void
130 setupSamePermuteExport (Teuchos::Array<GlobalOrdinal>& exportGIDs)
131 {
135 using Teuchos::arcp;
136 using Teuchos::Array;
137 using Teuchos::ArrayRCP;
138 using Teuchos::ArrayView;
139 using Teuchos::as;
140 using Teuchos::null;
141 using std::endl;
142 using LO = LocalOrdinal;
143 using GO = GlobalOrdinal;
144 using size_type = typename ArrayView<const GO>::size_type;
145 const char tfecfFuncName[] = "setupSamePermuteExport: ";
146 ProfilingRegion regionExport ("Tpetra::Export::setupSamePermuteExport");
147
148 std::unique_ptr<std::string> prefix;
149 if (this->verbose ()) {
150 auto srcMap = this->getSourceMap ();
151 auto comm = srcMap.is_null () ? Teuchos::null : srcMap->getComm ();
152 const int myRank = comm.is_null () ? -1 : comm->getRank ();
153
154 std::ostringstream os;
155 os << "Proc " << myRank << ": Tpetra::Export::setupSamePermuteExport: ";
156 prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
157
158 std::ostringstream os2;
159 os2 << *prefix << "Start" << std::endl;
160 this->verboseOutputStream () << os2.str ();
161 }
162
163 const map_type& source = * (this->getSourceMap ());
164 const map_type& target = * (this->getTargetMap ());
165 ArrayView<const GO> sourceGIDs = source.getLocalElementList ();
166 ArrayView<const GO> targetGIDs = target.getLocalElementList ();
167
168#ifdef HAVE_TPETRA_DEBUG
169 ArrayView<const GO> rawSrcGids = sourceGIDs;
170 ArrayView<const GO> rawTgtGids = targetGIDs;
171#else
172 const GO* const rawSrcGids = sourceGIDs.getRawPtr ();
173 const GO* const rawTgtGids = targetGIDs.getRawPtr ();
174#endif // HAVE_TPETRA_DEBUG
175 const size_type numSrcGids = sourceGIDs.size ();
176 const size_type numTgtGids = targetGIDs.size ();
177 const size_type numGids = std::min (numSrcGids, numTgtGids);
178
179 // Compute numSameIDs_: the number of initial GIDs that are the
180 // same (and occur in the same order) in both Maps. The point of
181 // numSameIDs_ is for the common case of an Export where all the
182 // overlapping GIDs are at the end of the source Map, but
183 // otherwise the source and target Maps are the same. This allows
184 // a fast contiguous copy for the initial "same IDs."
185 size_type numSameGids = 0;
186 for ( ; numSameGids < numGids &&
187 rawSrcGids[numSameGids] == rawTgtGids[numSameGids];
188 ++numSameGids)
189 {} // third clause of 'for' does everything
190 this->TransferData_->numSameIDs_ = numSameGids;
191
192 if (this->verbose ()) {
193 std::ostringstream os;
194 os << *prefix << "numIDs: " << numGids
195 << ", numSameIDs: " << numSameGids << endl;
196 this->verboseOutputStream () << os.str ();
197 }
198
199 // Compute permuteToLIDs_, permuteFromLIDs_, exportGIDs, and
200 // exportLIDs_. The first two arrays are IDs to be permuted, and
201 // the latter two arrays are IDs to sent out ("exported"), called
202 // "export" IDs.
203 //
204 // IDs to permute are in both the source and target Maps, which
205 // means we don't have to send or receive them, but we do have to
206 // rearrange (permute) them in general. IDs to send are in the
207 // source Map, but not in the target Map.
208
209 // Iterate over the source Map's LIDs, since we only need to do
210 // GID -> LID lookups for the target Map.
211 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
212 const LO numSrcLids = static_cast<LO> (numSrcGids);
213 LO numPermutes = 0;
214 LO numExports = 0;
215
216 for (LO srcLid = numSameGids; srcLid < numSrcLids; ++srcLid) {
217 const GO curSrcGid = rawSrcGids[srcLid];
218 // getLocalElement() returns LINVALID if the GID isn't in the
219 // target Map. This saves us a lookup (which
220 // isNodeGlobalElement() would do).
221 const LO tgtLid = target.getLocalElement (curSrcGid);
222 if (tgtLid != LINVALID) { // if target.isNodeGlobalElement (curSrcGid)
223 ++numPermutes;
224 }
225 else {
226 ++numExports;
227 }
228 }
229 if (this->verbose ()) {
230 std::ostringstream os;
231 os << *prefix << "numPermutes: " << numPermutes
232 << ", numExports: " << numExports << endl;
233 this->verboseOutputStream () << os.str ();
234 }
235 TEUCHOS_ASSERT( numPermutes + numExports ==
236 numSrcLids - numSameGids );
237
238 typename decltype (this->TransferData_->permuteToLIDs_)::t_host
239 permuteToLIDs (view_alloc_no_init ("permuteToLIDs"), numPermutes);
240 typename decltype (this->TransferData_->permuteToLIDs_)::t_host
241 permuteFromLIDs (view_alloc_no_init ("permuteFromLIDs"), numPermutes);
242 typename decltype (this->TransferData_->permuteToLIDs_)::t_host
243 exportLIDs (view_alloc_no_init ("exportLIDs"), numExports);
244
245 // FIXME (mfh 03 Feb 2019) Replace with std::unique_ptr of array,
246 // to avoid superfluous initialization on resize.
247 exportGIDs.resize (numExports);
248
249 {
250 LO numPermutes2 = 0;
251 LO numExports2 = 0;
252 for (LO srcLid = numSameGids; srcLid < numSrcLids; ++srcLid) {
253 const GO curSrcGid = rawSrcGids[srcLid];
254 const LO tgtLid = target.getLocalElement (curSrcGid);
255 if (tgtLid != LINVALID) {
256 permuteToLIDs[numPermutes2] = tgtLid;
257 permuteFromLIDs[numPermutes2] = srcLid;
258 ++numPermutes2;
259 }
260 else {
261 exportGIDs[numExports2] = curSrcGid;
262 exportLIDs[numExports2] = srcLid;
263 ++numExports2;
264 }
265 }
266 TEUCHOS_ASSERT( numPermutes == numPermutes2 );
267 TEUCHOS_ASSERT( numExports == numExports2 );
268 TEUCHOS_ASSERT( size_t (numExports) == size_t (exportGIDs.size ()) );
269 }
270
271 // Defer making this->TransferData_->exportLIDs_ until after
272 // getRemoteIndexList, since we might need to shrink it then.
273
274 // exportLIDs is the list of this process' LIDs that it has to
275 // send out. Since this is an Export, and therefore the target
276 // Map is nonoverlapping, we know that each export LID only needs
277 // to be sent to one process. However, the source Map may be
278 // overlapping, so multiple processes might send to the same LID
279 // on a receiving process.
280
281 if (numExports != 0 && ! source.isDistributed ()) {
282 // This Export has export LIDs, meaning that the source Map has
283 // entries on this process that are not in the target Map on
284 // this process. However, the source Map is not distributed
285 // globally. This implies that this Import is not locally
286 // complete on this process.
287 this->TransferData_->isLocallyComplete_ = false;
288 if (this->verbose ()) {
289 std::ostringstream os;
290 os << *prefix << "Export is not locally complete" << endl;
291 this->verboseOutputStream () << os.str ();
292 }
293 // mfh 12 Sep 2016: I disagree that this is "abuse"; it may be
294 // correct behavior, depending on the circumstances.
296 (true, std::runtime_error, "::setupSamePermuteExport(): Source has "
297 "export LIDs but Source is not distributed globally. Exporting to "
298 "a submap of the target map.");
299 }
300
301 // Compute exportPIDs_ ("outgoing" process IDs).
302 //
303 // For each GID in exportGIDs (GIDs to which this process must
304 // send), find its corresponding owning process (a.k.a. "image")
305 // ID in the target Map. Store these process IDs in
306 // exportPIDs_. These are the process IDs to which the Export
307 // needs to send data.
308 //
309 // We only need to do this if the source Map is distributed;
310 // otherwise, the Export doesn't have to perform any
311 // communication.
312 if (source.isDistributed ()) {
313 if (this->verbose ()) {
314 std::ostringstream os;
315 os << *prefix << "Source Map is distributed; "
316 "call targetMap.getRemoteiNdexList" << endl;
317 this->verboseOutputStream () << os.str ();
318 }
319 this->TransferData_->exportPIDs_.resize(exportGIDs.size ());
320 // This call will assign any GID in the target Map with no
321 // corresponding process ID a fake process ID of -1. We'll use
322 // this below to remove exports for processses that don't exist.
323 const LookupStatus lookup =
324 target.getRemoteIndexList (exportGIDs(),
325 this->TransferData_->exportPIDs_ ());
326 // mfh 12 Sep 2016: I disagree that this is "abuse"; it may be
327 // correct behavior, depending on the circumstances.
328 TPETRA_ABUSE_WARNING( lookup == IDNotPresent, std::runtime_error,
329 "::setupSamePermuteExport(): The source Map has GIDs not found "
330 "in the target Map.");
331
332 // Get rid of process IDs not in the target Map. This prevents
333 // exporting to GIDs which don't belong to any process in the
334 // target Map.
335 if (lookup == IDNotPresent) {
336 // There is at least one GID owned by the calling process in
337 // the source Map, which is not owned by any process in the
338 // target Map.
339 this->TransferData_->isLocallyComplete_ = false;
340
341 Teuchos::Array<int>& exportPIDs = this->TransferData_->exportPIDs_;
342
343 const size_type totalNumExports = exportPIDs.size ();
344 const size_type numInvalidExports =
345 std::count_if (exportPIDs.begin (), exportPIDs.end (),
346 [] (const int procId) { return procId == -1; });
347 if (this->verbose ()) {
348 std::ostringstream os;
349 os << *prefix << "totalNumExports: " << totalNumExports
350 << ", numInvalidExports: " << numInvalidExports << endl;
351 this->verboseOutputStream () << os.str ();
352 }
353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
354 (numInvalidExports == 0, std::logic_error,
355 "targetMap.getRemoteIndexList returned IDNotPresent, but no export "
356 "PIDs are -1. Please report this bug to the Tpetra developers.");
357
358 // We know that at least one export ID is invalid, that is,
359 // not in any process on the target Map. If all export IDs
360 // are invalid, we can delete all exports. Otherwise, keep
361 // the valid exports and discard the rest. This is legit
362 // Petra Object Model behavior, but it's a less common case.
363
364 if (numInvalidExports == totalNumExports) {
365 exportGIDs.resize (0);
366 exportLIDs = decltype (exportLIDs) ();
367 exportPIDs.resize (0);
368 }
369 else {
370 size_type numValidExports = 0;
371 for (size_type e = 0; e < totalNumExports; ++e) {
372 if (this->TransferData_->exportPIDs_[e] != -1) {
373 exportGIDs[numValidExports] = exportGIDs[e];
374 exportLIDs[numValidExports] = exportLIDs[e];
375 exportPIDs[numValidExports] = exportPIDs[e];
376 ++numValidExports;
377 }
378 }
379 exportGIDs.resize (numValidExports);
380 Kokkos::resize (exportLIDs, numValidExports);
381 exportPIDs.resize (numValidExports);
382 }
383 }
384 }
385
386 // FIXME (mfh 03 Feb 2019) These three DualViews could share a
387 // single device allocation, in order to avoid high cudaMalloc
388 // cost and device memory fragmentation.
389 makeDualViewFromOwningHostView (this->TransferData_->permuteToLIDs_, permuteToLIDs);
390 makeDualViewFromOwningHostView (this->TransferData_->permuteFromLIDs_, permuteFromLIDs);
391 makeDualViewFromOwningHostView (this->TransferData_->exportLIDs_, exportLIDs);
392
393 if (this->verbose ()) {
394 std::ostringstream os;
395 os << *prefix << "Done!" << std::endl;
396 this->verboseOutputStream () << os.str ();
397 }
398 }
399
400 template <class LocalOrdinal, class GlobalOrdinal, class Node>
401 void
403 setupRemote (Teuchos::Array<GlobalOrdinal>& exportGIDs)
404 {
407 using Teuchos::Array;
408 using std::endl;
409 using LO = LocalOrdinal;
410 using GO = GlobalOrdinal;
411
412 std::unique_ptr<std::string> prefix;
413 if (this->verbose ()) {
414 auto srcMap = this->getSourceMap ();
415 auto comm = srcMap.is_null () ? Teuchos::null : srcMap->getComm ();
416 const int myRank = comm.is_null () ? -1 : comm->getRank ();
417
418 std::ostringstream os;
419 os << "Proc " << myRank << ": Tpetra::Export::setupRemote: ";
420 prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
421
422 std::ostringstream os2;
423 os2 << *prefix << "Start" << std::endl;
424 this->verboseOutputStream () << os2.str ();
425 }
426
427 TEUCHOS_ASSERT( ! this->getTargetMap ().is_null () );
428 const map_type& tgtMap = * (this->getTargetMap ());
429
430 // Sort exportPIDs_ in ascending order, and apply the same
431 // permutation to exportGIDs_ and exportLIDs_. This ensures that
432 // exportPIDs_[i], exportGIDs_[i], and exportLIDs_[i] all
433 // refer to the same thing.
434 {
435 TEUCHOS_ASSERT( size_t (this->TransferData_->exportLIDs_.extent (0)) ==
436 size_t (this->TransferData_->exportPIDs_.size ()) );
437 this->TransferData_->exportLIDs_.modify_host ();
438 auto exportLIDs = this->TransferData_->exportLIDs_.view_host ();
439 sort3 (this->TransferData_->exportPIDs_.begin (),
440 this->TransferData_->exportPIDs_.end (),
441 exportGIDs.getRawPtr (),
442 exportLIDs.data ());
443 this->TransferData_->exportLIDs_.sync_device ();
444 // FIXME (mfh 03 Feb 2019) We actually end up sync'ing
445 // exportLIDs_ to device twice, once in setupSamePermuteExport,
446 // and once here. We could avoid the first sync.
447 }
448
449 if (this->verbose ()) {
450 std::ostringstream os;
451 os << *prefix << "Call createFromSends" << endl;
452 this->verboseOutputStream () << os.str ();
453 }
454
455 // Construct the list of entries that calling image needs to send
456 // as a result of everyone asking for what it needs to receive.
457 //
458 // mfh 05 Jan 2012: I understand the above comment as follows:
459 // Construct the communication plan from the list of image IDs to
460 // which we need to send.
461 Teuchos::Array<int>& exportPIDs = this->TransferData_->exportPIDs_;
462 Distributor& distributor = this->TransferData_->distributor_;
463 const size_t numRemoteIDs = distributor.createFromSends (exportPIDs ());
464
465 if (this->verbose ()) {
466 std::ostringstream os;
467 os << *prefix << "numRemoteIDs: " << numRemoteIDs
468 << "; call doPostsAndWaits" << endl;
469 this->verboseOutputStream () << os.str ();
470 }
471
472 // Use the communication plan with ExportGIDs to find out who is
473 // sending to us and get the proper ordering of GIDs for incoming
474 // remote entries (these will be converted to LIDs when done).
475
476 Kokkos::View<const GO*, Kokkos::HostSpace> exportGIDsConst(exportGIDs.data(), exportGIDs.size());
477 Kokkos::View<GO*, Kokkos::HostSpace> remoteGIDs("remoteGIDs", numRemoteIDs);
478 distributor.doPostsAndWaits(exportGIDsConst, 1, remoteGIDs);
479
480 // Remote (incoming) IDs come in as GIDs; convert to LIDs. LIDs
481 // tell this process where to store the incoming remote data.
482 using host_remote_lids_type =
483 typename decltype (this->TransferData_->remoteLIDs_)::t_host;
484 host_remote_lids_type remoteLIDs
485 (view_alloc_no_init ("remoteLIDs"), numRemoteIDs);
486
487 for (LO j = 0; j < LO (numRemoteIDs); ++j) {
488 remoteLIDs[j] = tgtMap.getLocalElement (remoteGIDs[j]);
489 }
490 makeDualViewFromOwningHostView (this->TransferData_->remoteLIDs_, remoteLIDs);
491
492 if (this->verbose ()) {
493 std::ostringstream os;
494 os << *prefix << "Done!" << endl;
495 this->verboseOutputStream () << os.str ();
496 }
497 }
498
499} // namespace Tpetra
500
501// Explicit instantiation macro.
502// Only invoke this when in the Tpetra namespace.
503// Most users do not need to use this.
504//
505// LO: The local ordinal type.
506// GO: The global ordinal type.
507// NODE: The Kokkos Node type.
508#define TPETRA_EXPORT_INSTANT(LO, GO, NODE) \
509 template class Export< LO , GO , NODE >;
510
511#endif // TPETRA_EXPORT_DEF_HPP
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Stand-alone utility functions and macros.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
Teuchos::RCP< ImportExportData< LocalOrdinal, GlobalOrdinal, Node > > TransferData_
void describeImpl(Teuchos::FancyOStream &out, const std::string &className, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Sets up and executes a communication plan for a Tpetra DistObject.
size_t createFromSends(const Teuchos::ArrayView< const int > &exportProcIDs)
Set up Distributor using list of process ranks to which this process will send.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
virtual void print(std::ostream &os) const
Print the Export's data to the given output stream.
Export(const Teuchos::RCP< const map_type > &source, const Teuchos::RCP< const map_type > &target)
Construct a Export object from the source and target Map.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
auto view_alloc_no_init(const std::string &label) -> decltype(Kokkos::view_alloc(label, Kokkos::WithoutInitializing))
Use in place of the string label as the first argument of Kokkos::View's constructor,...
void makeDualViewFromOwningHostView(Kokkos::DualView< ElementType *, DeviceType > &dv, const typename Kokkos::DualView< ElementType *, DeviceType >::t_host &hostView)
Initialize dv such that its host View is hostView.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3, const bool stableSort=false)
Sort the first array, and apply the same permutation to the second and third arrays.