Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_ComputeGatherMap.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef __Tpetra_ComputeGatherMap_hpp
11#define __Tpetra_ComputeGatherMap_hpp
12
17#include "Tpetra_Map.hpp"
18#include <numeric>
19
20// Macro that marks a function as "possibly unused," in order to
21// suppress build warnings.
22#if ! defined(TRILINOS_UNUSED_FUNCTION)
23# if defined(__GNUC__) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
24# define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
25# elif defined(__clang__)
26# if __has_attribute(unused)
27# define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
28# else
29# define TRILINOS_UNUSED_FUNCTION
30# endif // Clang has 'unused' attribute
31# elif defined(__IBMCPP__)
32// IBM's C++ compiler for Blue Gene/Q (V12.1) implements 'used' but not 'unused'.
33//
34// http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp
35# define TRILINOS_UNUSED_FUNCTION
36# else // some other compiler
37# define TRILINOS_UNUSED_FUNCTION
38# endif
39#endif // ! defined(TRILINOS_UNUSED_FUNCTION)
40
41
42namespace Tpetra {
43 namespace Details {
44
45 namespace {
46#ifdef HAVE_MPI
47 // We're communicating integers of type IntType. Figure
48 // out the right MPI_Datatype for IntType. Usually it
49 // is int or long, so these are good enough for now.
50 template<class IntType> TRILINOS_UNUSED_FUNCTION MPI_Datatype
51 getMpiDatatype () {
52 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
53 "Not implemented for IntType != int, long, or long long");
54 }
55
56 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
57 getMpiDatatype<int> () { return MPI_INT; }
58
59 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
60 getMpiDatatype<long> () { return MPI_LONG; }
61
62 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
63 getMpiDatatype<long long> () { return MPI_LONG_LONG; }
64#endif // HAVE_MPI
65
66 template<class IntType>
67 void
68 gather (const IntType sendbuf[], const int sendcnt,
69 IntType recvbuf[], const int recvcnt,
70 const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
71 {
72 using Teuchos::RCP;
73 using Teuchos::rcp_dynamic_cast;
74#ifdef HAVE_MPI
75 using Teuchos::MpiComm;
76
77 // Get the raw MPI communicator, so we don't have to wrap MPI_Gather in Teuchos.
78 RCP<const MpiComm<int> > mpiComm = rcp_dynamic_cast<const MpiComm<int> > (comm);
79 if (! mpiComm.is_null ()) {
80 MPI_Datatype sendtype = getMpiDatatype<IntType> ();
81 MPI_Datatype recvtype = sendtype;
82 MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
83 const int err = MPI_Gather (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
84 sendcnt,
85 sendtype,
86 reinterpret_cast<void*> (recvbuf),
87 recvcnt,
88 recvtype,
89 root,
90 rawMpiComm);
91 TEUCHOS_TEST_FOR_EXCEPTION(
92 err != MPI_SUCCESS, std::runtime_error, "MPI_Gather failed");
93 return;
94 }
95#endif // HAVE_MPI
96
97 TEUCHOS_TEST_FOR_EXCEPTION(
98 recvcnt > sendcnt, std::invalid_argument,
99 "gather: If the input communicator contains only one process, "
100 "then you cannot receive more entries than you send. "
101 "You aim to receive " << recvcnt << " entries, "
102 "but to send " << sendcnt << ".");
103 // Serial communicator case: just copy. recvcnt is the
104 // amount to receive, so it's the amount to copy.
105 std::copy (sendbuf, sendbuf + recvcnt, recvbuf);
106 }
107
108 template<class IntType>
109 void
110 gatherv (const IntType sendbuf[], const int sendcnt,
111 IntType recvbuf[], const int recvcnts[], const int displs[],
112 const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
113 {
114 using Teuchos::RCP;
115 using Teuchos::rcp_dynamic_cast;
116#ifdef HAVE_MPI
117 using Teuchos::MpiComm;
118
119 // Get the raw MPI communicator, so we don't have to wrap
120 // MPI_Gather in Teuchos.
121 RCP<const MpiComm<int> > mpiComm =
122 rcp_dynamic_cast<const MpiComm<int> > (comm);
123 if (! mpiComm.is_null ()) {
124 MPI_Datatype sendtype = getMpiDatatype<IntType> ();
125 MPI_Datatype recvtype = sendtype;
126 MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
127 const int err = MPI_Gatherv (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
128 sendcnt,
129 sendtype,
130 reinterpret_cast<void*> (recvbuf),
131 const_cast<int*> (recvcnts),
132 const_cast<int*> (displs),
133 recvtype,
134 root,
135 rawMpiComm);
136 TEUCHOS_TEST_FOR_EXCEPTION(
137 err != MPI_SUCCESS, std::runtime_error, "MPI_Gatherv failed");
138 return;
139 }
140#endif // HAVE_MPI
141 TEUCHOS_TEST_FOR_EXCEPTION(
142 recvcnts[0] > sendcnt,
143 std::invalid_argument,
144 "gatherv: If the input communicator contains only one process, "
145 "then you cannot receive more entries than you send. "
146 "You aim to receive " << recvcnts[0] << " entries, "
147 "but to send " << sendcnt << ".");
148 // Serial communicator case: just copy. recvcnts[0] is the
149 // amount to receive, so it's the amount to copy. Start
150 // writing to recvbuf at the offset displs[0].
151 std::copy (sendbuf, sendbuf + recvcnts[0], recvbuf + displs[0]);
152 }
153 } // namespace (anonymous)
154
155
156 // Given an arbitrary Map, compute a Map containing all the GIDs
157 // in the same order as in (the one-to-one version of) map, but
158 // all owned exclusively by Proc 0.
159 template<class MapType>
160 Teuchos::RCP<const MapType>
161 computeGatherMap (Teuchos::RCP<const MapType> map,
162 const Teuchos::RCP<Teuchos::FancyOStream>& err,
163 const bool dbg=false)
164 {
167 using Teuchos::Array;
168 using Teuchos::ArrayRCP;
169 using Teuchos::ArrayView;
170 using Teuchos::as;
171 using Teuchos::Comm;
172 using Teuchos::RCP;
173 using std::endl;
174 typedef typename MapType::local_ordinal_type LO;
175 typedef typename MapType::global_ordinal_type GO;
176 typedef typename MapType::node_type NT;
177
178 RCP<const Comm<int> > comm = map->getComm ();
179 const int numProcs = comm->getSize ();
180 const int myRank = comm->getRank ();
181
182 if (! err.is_null ()) {
183 err->pushTab ();
184 }
185 if (dbg) {
186 *err << myRank << ": computeGatherMap:" << endl;
187 }
188 if (! err.is_null ()) {
189 err->pushTab ();
190 }
191
192 RCP<const MapType> oneToOneMap;
193 if (map->isContiguous ()) {
194 oneToOneMap = map; // contiguous Maps are always 1-to-1
195 } else {
196 if (dbg) {
197 *err << myRank << ": computeGatherMap: Calling createOneToOne" << endl;
198 }
199 // It could be that Map is one-to-one, but the class doesn't
200 // give us a way to test this, other than to create the
201 // one-to-one Map.
202 oneToOneMap = createOneToOne<LO, GO, NT> (map);
203 }
204
205 RCP<const MapType> gatherMap;
206 if (numProcs == 1) {
207 gatherMap = oneToOneMap;
208 } else {
209 if (dbg) {
210 *err << myRank << ": computeGatherMap: Gathering Map counts" << endl;
211 }
212 // Gather each process' count of Map elements to Proc 0,
213 // into the recvCounts array. This will tell Proc 0 how
214 // many GIDs to expect from each process when calling
215 // MPI_Gatherv. Counts and offsets are all int, because
216 // that's what MPI uses. Teuchos::as will at least prevent
217 // bad casts to int in a debug build.
218 //
219 // Yeah, it's not memory scalable. Guess what: We're going
220 // to bring ALL the data to Proc 0, not just the receive
221 // counts. The first MPI_Gather is only the beginning...
222 // Seriously, if you want to make this memory scalable, the
223 // right thing to do (after the Export to the one-to-one
224 // Map) is to go round robin through the processes, having
225 // each send a chunk of data (with its GIDs, to get the
226 // order of rows right) at a time, and overlapping writing
227 // to the file (resp. reading from it) with receiving (resp.
228 // sending) the next chunk.
229 const int myEltCount = as<int> (oneToOneMap->getLocalNumElements ());
230 Array<int> recvCounts (numProcs);
231 const int rootProc = 0;
232 gather<int> (&myEltCount, 1, recvCounts.getRawPtr (), 1, rootProc, comm);
233
234 ArrayView<const GO> myGlobalElts = oneToOneMap->getLocalElementList ();
235 const int numMyGlobalElts = as<int> (myGlobalElts.size ());
236 // Only Proc 0 needs to receive and store all the GIDs (from
237 // all processes).
238 ArrayRCP<GO> allGlobalElts;
239 if (myRank == 0) {
240 allGlobalElts = Teuchos::arcp<GO> (oneToOneMap->getGlobalNumElements ());
241 std::fill (allGlobalElts.begin (), allGlobalElts.end (), 0);
242 }
243
244 if (dbg) {
245 *err << myRank << ": computeGatherMap: Computing MPI_Gatherv "
246 "displacements" << endl;
247 }
248 // Displacements for gatherv() in this case (where we are
249 // gathering into a contiguous array) are an exclusive
250 // partial sum (first entry is zero, second starts the
251 // partial sum) of recvCounts.
252 Array<int> displs (numProcs, 0);
253 std::partial_sum (recvCounts.begin (), recvCounts.end () - 1,
254 displs.begin () + 1);
255 if (dbg) {
256 *err << myRank << ": computeGatherMap: Calling MPI_Gatherv" << endl;
257 }
258 gatherv<GO> (myGlobalElts.getRawPtr (), numMyGlobalElts,
259 allGlobalElts.getRawPtr (), recvCounts.getRawPtr (),
260 displs.getRawPtr (), rootProc, comm);
261
262 if (dbg) {
263 *err << myRank << ": computeGatherMap: Creating gather Map" << endl;
264 }
265 // Create a Map with all the GIDs, in the same order as in
266 // the one-to-one Map, but owned by Proc 0.
267 ArrayView<const GO> allElts (NULL, 0);
268 if (myRank == 0) {
269 allElts = allGlobalElts ();
270 }
271 const global_size_t INVALID = Teuchos::OrdinalTraits<global_size_t>::invalid ();
272 gatherMap = rcp (new MapType (INVALID, allElts,
273 oneToOneMap->getIndexBase (),
274 comm));
275 }
276 if (! err.is_null ()) {
277 err->popTab ();
278 }
279 if (dbg) {
280 *err << myRank << ": computeGatherMap: done" << endl;
281 }
282 if (! err.is_null ()) {
283 err->popTab ();
284 }
285 return gatherMap;
286 }
287
288 } // namespace Details
289} // namespace Tpetra
290
291#endif // __Tpetra_ComputeGatherMap_hpp
Nonmember function that computes a residual Computes R = B - A * X.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...