MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AMGXOperator_decl.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_AMGXOPERATOR_DECL_HPP
11#define MUELU_AMGXOPERATOR_DECL_HPP
12
13#if defined(HAVE_MUELU_AMGX)
14#include <Teuchos_ParameterList.hpp>
15
16#include <Tpetra_Operator.hpp>
17#include <Tpetra_CrsMatrix.hpp>
18#include <Tpetra_MultiVector.hpp>
19#include <Tpetra_Distributor.hpp>
20#include <Tpetra_HashTable.hpp>
21#include <Tpetra_Import.hpp>
22#include <Tpetra_Import_Util.hpp>
23
24#include "MueLu_Exceptions.hpp"
25#include "MueLu_TimeMonitor.hpp"
26#include "MueLu_TpetraOperator.hpp"
28
29#include <cuda_runtime.h>
30#include <amgx_c.h>
31
32namespace MueLu {
33
40template <class Scalar,
41 class LocalOrdinal,
42 class GlobalOrdinal,
43 class Node>
44class AMGXOperator : public TpetraOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node>, public BaseClass {
45 private:
46 typedef Scalar SC;
49 typedef Node NO;
50
51 typedef Tpetra::Map<LO, GO, NO> Map;
52 typedef Tpetra::MultiVector<SC, LO, GO, NO> MultiVector;
53
54 public:
56
57
59 AMGXOperator(const Teuchos::RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> >& InA, Teuchos::ParameterList& paramListIn) {}
60
62 virtual ~AMGXOperator() {}
63
65
67 Teuchos::RCP<const Map> getDomainMap() const {
68 throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
69 }
70
72 Teuchos::RCP<const Map> getRangeMap() const {
73 throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
74 }
75
77
81 void apply(const MultiVector& X, MultiVector& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS,
82 Scalar alpha = Teuchos::ScalarTraits<Scalar>::one(), Scalar beta = Teuchos::ScalarTraits<Scalar>::zero()) const {
83 throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
84 }
85
87 bool hasTransposeApply() const {
88 throw Exceptions::RuntimeError("Cannot use AMGXOperator with scalar != double and/or global ordinal != int \n");
89 }
90
91 RCP<MueLu::Hierarchy<SC, LO, GO, NO> > GetHierarchy() const {
92 throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n");
93 }
94
95 private:
96};
97
104template <class Node>
105class AMGXOperator<double, int, int, Node> : public TpetraOperator<double, int, int, Node> {
106 private:
107 typedef double SC;
108 typedef int LO;
109 typedef int GO;
110 typedef Node NO;
111
112 typedef Tpetra::Map<LO, GO, NO> Map;
113 typedef Tpetra::MultiVector<SC, LO, GO, NO> MultiVector;
114
115 void printMaps(Teuchos::RCP<const Teuchos::Comm<int> >& comm, const std::vector<std::vector<int> >& vec, const std::vector<int>& perm,
116 const int* nbrs, const Map& map, const std::string& label) {
117 for (int p = 0; p < comm->getSize(); p++) {
118 if (comm->getRank() == p) {
119 std::cout << "========\n"
120 << label << ", lid (gid), PID " << p << "\n========" << std::endl;
121
122 for (size_t i = 0; i < vec.size(); ++i) {
123 std::cout << " neighbor " << nbrs[i] << " :";
124 for (size_t j = 0; j < vec[i].size(); ++j)
125 std::cout << " " << vec[i][j] << " (" << map.getGlobalElement(perm[vec[i][j]]) << ")";
126 std::cout << std::endl;
127 }
128 std::cout << std::endl;
129 } else {
130 sleep(1);
131 }
132 comm->barrier();
133 }
134 }
135
136 public:
138
139 AMGXOperator(const Teuchos::RCP<Tpetra::CrsMatrix<SC, LO, GO, NO> >& inA, Teuchos::ParameterList& paramListIn) {
140 RCP<const Teuchos::Comm<int> > comm = inA->getRowMap()->getComm();
141 int numProcs = comm->getSize();
142 int myRank = comm->getRank();
143
144 RCP<Teuchos::Time> amgxTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: initialize");
145 amgxTimer->start();
146 // Initialize
147 // AMGX_SAFE_CALL(AMGX_initialize());
148 // AMGX_SAFE_CALL(AMGX_initialize_plugins());
149
150 /*system*/
151 // AMGX_SAFE_CALL(AMGX_register_print_callback(&print_callback));
152 AMGX_SAFE_CALL(AMGX_install_signal_handler());
153 Teuchos::ParameterList configs = paramListIn.sublist("amgx:params", true);
154 if (configs.isParameter("json file")) {
155 AMGX_SAFE_CALL(AMGX_config_create_from_file(&Config_, (const char*)&configs.get<std::string>("json file")[0]));
156 } else {
157 std::ostringstream oss;
158 oss << "";
159 ParameterList::ConstIterator itr;
160 for (itr = configs.begin(); itr != configs.end(); ++itr) {
161 const std::string& name = configs.name(itr);
162 const ParameterEntry& entry = configs.entry(itr);
163 oss << name << "=" << filterValueToString(entry) << ", ";
164 }
165 oss << "\0";
166 std::string configString = oss.str();
167 if (configString == "") {
168 // print msg that using defaults
169 // GetOStream(Warnings0) << "Warning: No configuration parameters specified, using default AMGX configuration parameters. \n";
170 }
171 AMGX_SAFE_CALL(AMGX_config_create(&Config_, configString.c_str()));
172 }
173
174 // TODO: we probably need to add "exception_handling=1" to the parameter list
175 // to switch on internal error handling (with no need for AMGX_SAFE_CALL)
176
177 // AMGX_SAFE_CALL(AMGX_config_add_parameters(&Config_, "exception_handling=1"))
178
179#define NEW_COMM
180#ifdef NEW_COMM
181 // NOTE: MPI communicator used in AMGX_resources_create must exist in the scope of AMGX_matrix_comm_from_maps_one_ring
182 // FIXME: fix for serial comm
183 RCP<const Teuchos::MpiComm<int> > tmpic = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm->duplicate());
184 TEUCHOS_TEST_FOR_EXCEPTION(tmpic.is_null(), Exceptions::RuntimeError, "Communicator is not MpiComm");
185
186 RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
187 MPI_Comm mpiComm = *rawMpiComm;
188#endif
189
190 // Construct AMGX resources
191 if (numProcs == 1) {
192 AMGX_resources_create_simple(&Resources_, Config_);
193
194 } else {
195 int numGPUDevices;
196 cudaGetDeviceCount(&numGPUDevices);
197 int device[] = {(comm->getRank() % numGPUDevices)};
198
199 AMGX_config_add_parameters(&Config_, "communicator=MPI");
200#ifdef NEW_COMM
201 AMGX_resources_create(&Resources_, Config_, &mpiComm, 1 /* number of GPU devices utilized by this rank */, device);
202#else
203 AMGX_resources_create(&Resources_, Config_, MPI_COMM_WORLD, 1 /* number of GPU devices utilized by this rank */, device);
204#endif
205 }
206
207 AMGX_Mode mode = AMGX_mode_dDDI;
208 AMGX_solver_create(&Solver_, Resources_, mode, Config_);
209 AMGX_matrix_create(&A_, Resources_, mode);
210 AMGX_vector_create(&X_, Resources_, mode);
211 AMGX_vector_create(&Y_, Resources_, mode);
212
213 amgxTimer->stop();
214 amgxTimer->incrementNumCalls();
215
216 std::vector<int> amgx2muelu;
217
218 // Construct AMGX communication pattern
219 if (numProcs > 1) {
220 RCP<const Tpetra::Import<LO, GO, NO> > importer = inA->getCrsGraph()->getImporter();
221
222 TEUCHOS_TEST_FOR_EXCEPTION(importer.is_null(), MueLu::Exceptions::RuntimeError, "The matrix A has no Import object.");
223
224 Tpetra::Distributor distributor = importer->getDistributor();
225
226 Array<int> sendRanks = distributor.getProcsTo();
227 Array<int> recvRanks = distributor.getProcsFrom();
228
229 std::sort(sendRanks.begin(), sendRanks.end());
230 std::sort(recvRanks.begin(), recvRanks.end());
231
232 bool match = true;
233 if (sendRanks.size() != recvRanks.size()) {
234 match = false;
235 } else {
236 for (int i = 0; i < sendRanks.size(); i++) {
237 if (recvRanks[i] != sendRanks[i])
238 match = false;
239 break;
240 }
241 }
242 TEUCHOS_TEST_FOR_EXCEPTION(!match, MueLu::Exceptions::RuntimeError,
243 "AMGX requires that the processors that we send to and receive from are the same. "
244 "This is not the case: we send to {"
245 << sendRanks << "} and receive from {" << recvRanks << "}");
246
247 int num_neighbors = sendRanks.size(); // does not include the calling process
248 const int* neighbors = &sendRanks[0];
249
250 // Later on, we'll have to organize the send and recv data by PIDs,
251 // i.e, a vector V of vectors, where V[i] is PID i's vector of data.
252 // Hence we need to be able to quickly look up an array index
253 // associated with each PID.
254 Tpetra::Details::HashTable<int, int> hashTable(3 * num_neighbors);
255 for (int i = 0; i < num_neighbors; i++)
256 hashTable.add(neighbors[i], i);
257
258 // Get some information out
259 ArrayView<const int> exportLIDs = importer->getExportLIDs();
260 ArrayView<const int> exportPIDs = importer->getExportPIDs();
261 Array<int> importPIDs;
262 Tpetra::Import_Util::getPids(*importer, importPIDs, true /* make local -1 */);
263
264 // Construct the reordering for AMGX as in AMGX_matrix_upload_all documentation
265 RCP<const Map> rowMap = inA->getRowMap();
266 RCP<const Map> colMap = inA->getColMap();
267
268 int N = rowMap->getLocalNumElements(), Nc = colMap->getLocalNumElements();
269 muelu2amgx_.resize(Nc, -1);
270
271 int numUniqExports = 0;
272 for (int i = 0; i < exportLIDs.size(); i++)
273 if (muelu2amgx_[exportLIDs[i]] == -1) {
274 numUniqExports++;
275 muelu2amgx_[exportLIDs[i]] = -2;
276 }
277
278 int localOffset = 0, exportOffset = N - numUniqExports;
279 // Go through exported LIDs and put them at the end of LIDs
280 for (int i = 0; i < exportLIDs.size(); i++)
281 if (muelu2amgx_[exportLIDs[i]] < 0) // exportLIDs are not unique
282 muelu2amgx_[exportLIDs[i]] = exportOffset++;
283 // Go through all non-export LIDs, and put them at the beginning of LIDs
284 for (int i = 0; i < N; i++)
285 if (muelu2amgx_[i] == -1)
286 muelu2amgx_[i] = localOffset++;
287 // Go through the tail (imported LIDs), and order those by neighbors
288 int importOffset = N;
289 for (int k = 0; k < num_neighbors; k++)
290 for (int i = 0; i < importPIDs.size(); i++)
291 if (importPIDs[i] != -1 && hashTable.get(importPIDs[i]) == k)
292 muelu2amgx_[i] = importOffset++;
293
294 amgx2muelu.resize(muelu2amgx_.size());
295 for (int i = 0; i < (int)muelu2amgx_.size(); i++)
296 amgx2muelu[muelu2amgx_[i]] = i;
297
298 // Construct send arrays
299 std::vector<std::vector<int> > sendDatas(num_neighbors);
300 std::vector<int> send_sizes(num_neighbors, 0);
301 for (int i = 0; i < exportPIDs.size(); i++) {
302 int index = hashTable.get(exportPIDs[i]);
303 sendDatas[index].push_back(muelu2amgx_[exportLIDs[i]]);
304 send_sizes[index]++;
305 }
306 // FIXME: sendDatas must be sorted (based on GIDs)
307
308 std::vector<const int*> send_maps(num_neighbors);
309 for (int i = 0; i < num_neighbors; i++)
310 send_maps[i] = &(sendDatas[i][0]);
311
312 // Debugging
313 // printMaps(comm, sendDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "send_map_vector");
314
315 // Construct recv arrays
316 std::vector<std::vector<int> > recvDatas(num_neighbors);
317 std::vector<int> recv_sizes(num_neighbors, 0);
318 for (int i = 0; i < importPIDs.size(); i++)
319 if (importPIDs[i] != -1) {
320 int index = hashTable.get(importPIDs[i]);
321 recvDatas[index].push_back(muelu2amgx_[i]);
322 recv_sizes[index]++;
323 }
324 // FIXME: recvDatas must be sorted (based on GIDs)
325
326 std::vector<const int*> recv_maps(num_neighbors);
327 for (int i = 0; i < num_neighbors; i++)
328 recv_maps[i] = &(recvDatas[i][0]);
329
330 // Debugging
331 // printMaps(comm, recvDatas, amgx2muelu, neighbors, *importer->getTargetMap(), "recv_map_vector");
332
333 AMGX_SAFE_CALL(AMGX_matrix_comm_from_maps_one_ring(A_, 1, num_neighbors, neighbors, &send_sizes[0], &send_maps[0], &recv_sizes[0], &recv_maps[0]));
334
335 AMGX_vector_bind(X_, A_);
336 AMGX_vector_bind(Y_, A_);
337 }
338
339 RCP<Teuchos::Time> matrixTransformTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transform matrix");
340 matrixTransformTimer->start();
341
342 ArrayRCP<const size_t> ia_s;
343 ArrayRCP<const int> ja;
344 ArrayRCP<const double> a;
345 inA->getAllValues(ia_s, ja, a);
346
347 ArrayRCP<int> ia(ia_s.size());
348 for (int i = 0; i < ia.size(); i++)
349 ia[i] = Teuchos::as<int>(ia_s[i]);
350
351 N_ = inA->getLocalNumRows();
352 int nnz = inA->getLocalNumEntries();
353
354 matrixTransformTimer->stop();
355 matrixTransformTimer->incrementNumCalls();
356
357 // Upload matrix
358 // TODO Do we need to pin memory here through AMGX_pin_memory?
359 RCP<Teuchos::Time> matrixTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer matrix CPU->GPU");
360 matrixTimer->start();
361 if (numProcs == 1) {
362 AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia[0], &ja[0], &a[0], NULL);
363
364 } else {
365 // Transform the matrix
366 std::vector<int> ia_new(ia.size());
367 std::vector<int> ja_new(ja.size());
368 std::vector<double> a_new(a.size());
369
370 ia_new[0] = 0;
371 for (int i = 0; i < N_; i++) {
372 int oldRow = amgx2muelu[i];
373
374 ia_new[i + 1] = ia_new[i] + (ia[oldRow + 1] - ia[oldRow]);
375
376 for (int j = ia[oldRow]; j < ia[oldRow + 1]; j++) {
377 int offset = j - ia[oldRow];
378 ja_new[ia_new[i] + offset] = muelu2amgx_[ja[j]];
379 a_new[ia_new[i] + offset] = a[j];
380 }
381 // Do bubble sort on two arrays
382 // NOTE: There are multiple possible optimizations here (even of bubble sort)
383 bool swapped;
384 do {
385 swapped = false;
386
387 for (int j = ia_new[i]; j < ia_new[i + 1] - 1; j++)
388 if (ja_new[j] > ja_new[j + 1]) {
389 std::swap(ja_new[j], ja_new[j + 1]);
390 std::swap(a_new[j], a_new[j + 1]);
391 swapped = true;
392 }
393 } while (swapped == true);
394 }
395
396 AMGX_matrix_upload_all(A_, N_, nnz, 1, 1, &ia_new[0], &ja_new[0], &a_new[0], NULL);
397 }
398 matrixTimer->stop();
399 matrixTimer->incrementNumCalls();
400
401 domainMap_ = inA->getDomainMap();
402 rangeMap_ = inA->getRangeMap();
403
404 RCP<Teuchos::Time> realSetupTimer = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: setup (total)");
405 realSetupTimer->start();
406 AMGX_solver_setup(Solver_, A_);
407 realSetupTimer->stop();
408 realSetupTimer->incrementNumCalls();
409
410 vectorTimer1_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vectors CPU->GPU");
411 vectorTimer2_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: transfer vector GPU->CPU");
412 solverTimer_ = Teuchos::TimeMonitor::getNewTimer("MueLu: AMGX: Solve (total)");
413 }
414
416 virtual ~AMGXOperator() {
417 // Comment this out if you need rebuild to work. This causes AMGX_solver_destroy memory issues.
418 AMGX_SAFE_CALL(AMGX_solver_destroy(Solver_));
419 AMGX_SAFE_CALL(AMGX_vector_destroy(X_));
420 AMGX_SAFE_CALL(AMGX_vector_destroy(Y_));
421 AMGX_SAFE_CALL(AMGX_matrix_destroy(A_));
422 AMGX_SAFE_CALL(AMGX_resources_destroy(Resources_));
423 AMGX_SAFE_CALL(AMGX_config_destroy(Config_));
424 }
425
427 Teuchos::RCP<const Map> getDomainMap() const;
428
430 Teuchos::RCP<const Map> getRangeMap() const;
431
433
437 void apply(const MultiVector& X, MultiVector& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS,
438 SC alpha = Teuchos::ScalarTraits<SC>::one(), SC beta = Teuchos::ScalarTraits<SC>::zero()) const;
439
441 bool hasTransposeApply() const;
442
443 RCP<MueLu::Hierarchy<SC, LO, GO, NO> > GetHierarchy() const {
444 throw Exceptions::RuntimeError("AMGXOperator does not hold a MueLu::Hierarchy object \n");
445 }
446
447 std::string filterValueToString(const Teuchos::ParameterEntry& entry) {
448 return (entry.isList() ? std::string("...") : toString(entry.getAny()));
449 }
450
451 int sizeA() {
452 int sizeX, sizeY, n;
453 AMGX_matrix_get_size(A_, &n, &sizeX, &sizeY);
454 return n;
455 }
456
457 int iters() {
458 int it;
459 AMGX_solver_get_iterations_number(Solver_, &it);
460 return it;
461 }
462
463 AMGX_SOLVE_STATUS getStatus() {
464 AMGX_SOLVE_STATUS status;
465 AMGX_solver_get_status(Solver_, &status);
466 return status;
467 }
468
469 private:
470 AMGX_solver_handle Solver_;
471 AMGX_resources_handle Resources_;
472 AMGX_config_handle Config_;
473 AMGX_matrix_handle A_;
474 AMGX_vector_handle X_;
475 AMGX_vector_handle Y_;
476 int N_;
477
478 RCP<const Map> domainMap_;
479 RCP<const Map> rangeMap_;
480
481 std::vector<int> muelu2amgx_;
482
483 RCP<Teuchos::Time> vectorTimer1_;
484 RCP<Teuchos::Time> vectorTimer2_;
485 RCP<Teuchos::Time> solverTimer_;
486};
487
488} // namespace MueLu
489
490#endif // HAVE_MUELU_AMGX
491#endif // MUELU_AMGXOPERATOR_DECL_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
void printMaps(Teuchos::RCP< const Teuchos::Comm< int > > &comm, const std::vector< std::vector< int > > &vec, const std::vector< int > &perm, const int *nbrs, const Map &map, const std::string &label)
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &inA, Teuchos::ParameterList &paramListIn)
std::string filterValueToString(const Teuchos::ParameterEntry &entry)
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
virtual ~AMGXOperator()
Destructor.
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Returns a solution for the linear system AX=Y in the Tpetra::MultiVector X.
Teuchos::RCP< const Map > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
AMGXOperator(const Teuchos::RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > &InA, Teuchos::ParameterList &paramListIn)
Constructor.
Tpetra::Map< LO, GO, NO > Map
RCP< MueLu::Hierarchy< SC, LO, GO, NO > > GetHierarchy() const
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
Tpetra::MultiVector< SC, LO, GO, NO > MultiVector
Teuchos::RCP< const Map > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Base class for MueLu classes.
Exception throws to report errors in the internal logical of the program.
Namespace for MueLu classes and methods.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.