Xpetra Version of the Day
Loading...
Searching...
No Matches
Xpetra_MatrixMatrix_decl.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Xpetra: A linear algebra interface package
4//
5// Copyright 2012 NTESS and the Xpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef PACKAGES_XPETRA_SUP_UTILS_XPETRA_MATRIXMATRIX_DECL_HPP_
11#define PACKAGES_XPETRA_SUP_UTILS_XPETRA_MATRIXMATRIX_DECL_HPP_
12
13#include "Xpetra_ConfigDefs.hpp"
14
15#include "Xpetra_BlockedCrsMatrix.hpp"
16#include "Xpetra_CrsMatrixWrap.hpp"
17#include "Xpetra_MapExtractor.hpp"
18#include "Xpetra_Map.hpp"
19#include "Xpetra_MatrixFactory.hpp"
20#include "Xpetra_Matrix.hpp"
21#include "Xpetra_StridedMapFactory.hpp"
22#include "Xpetra_StridedMap.hpp"
23
24#include "Xpetra_Helpers.hpp"
25
26#ifdef HAVE_XPETRA_EPETRA
28#endif
29
30#ifdef HAVE_XPETRA_EPETRAEXT
31#include <EpetraExt_MatrixMatrix.h>
32#include <EpetraExt_RowMatrixOut.h>
33#include <Epetra_RowMatrixTransposer.h>
34#endif // HAVE_XPETRA_EPETRAEXT
35
36#ifdef HAVE_XPETRA_TPETRA
37#include <TpetraExt_MatrixMatrix.hpp>
38#include <Tpetra_RowMatrixTransposer.hpp>
39#include <MatrixMarket_Tpetra.hpp>
40#include <Xpetra_TpetraCrsMatrix.hpp>
41#include <Xpetra_TpetraBlockCrsMatrix.hpp>
42#include <Tpetra_BlockCrsMatrix_Helpers.hpp>
43#include <Xpetra_TpetraMultiVector.hpp>
44#include <Xpetra_TpetraVector.hpp>
45#endif // HAVE_XPETRA_TPETRA
46
47namespace Xpetra {
48
49template <class Scalar,
50 class LocalOrdinal /*= int*/,
51 class GlobalOrdinal /*= LocalOrdinal*/,
52 class Node /*= Tpetra::KokkosClassic::DefaultNode::DefaultNodeType*/>
53class MatrixMatrix {
54#undef XPETRA_MATRIXMATRIX_SHORT
56
57 public:
59 In a parallel setting, A and B need not have matching distributions,
60 but C needs to have the same row-map as A (if transposeA is false).
61 At this time C=AT*B and C=A*BT are known to not work. However,
62 C=A*B and C=AT*BT are known to work, Kurtis Nusbaum 03/24/2011
63
64 @param A Input, must already have had 'FillComplete()' called.
65 @param transposeA Input, whether to use transpose of matrix A.
66 @param B Input, must already have had 'FillComplete()' called.
67 @param transposeB Input, whether to use transpose of matrix B.
68 @param C Result. On entry to this method, it doesn't matter whether
69 FillComplete() has already been called on C or not. If it has,
70 then C's graph must already contain all nonzero locations that
71 will be produced when forming the product A*B. On exit,
72 C.FillComplete() will have been called, unless the last argument
73 to this function is specified to be false.
74 @param call_FillComplete_on_result Optional argument, defaults to true.
75 Power users may specify this argument to be false if they *DON'T*
76 want this function to call C.FillComplete. (It is often useful
77 to allow this function to call C.FillComplete, in cases where
78 one or both of the input matrices are rectangular and it is not
79 trivial to know which maps to use for the domain- and range-maps.)
80
81*/
82 static void Multiply(const Matrix& A, bool transposeA,
83 const Matrix& B, bool transposeB,
84 Matrix& C,
85 bool call_FillComplete_on_result = true,
86 bool doOptimizeStorage = true,
87 const std::string& label = std::string(),
88 const RCP<ParameterList>& params = null);
89
112 static RCP<Matrix> Multiply(const Matrix& A, bool transposeA, const Matrix& B, bool transposeB, RCP<Matrix> C_in,
114 bool doFillComplete = true,
115 bool doOptimizeStorage = true,
116 const std::string& label = std::string(),
117 const RCP<ParameterList>& params = null);
118
129 static RCP<Matrix> Multiply(const Matrix& A, bool transposeA, const Matrix& B, bool transposeB, Teuchos::FancyOStream& fos,
130 bool callFillCompleteOnResult = true, bool doOptimizeStorage = true, const std::string& label = std::string(),
131 const RCP<ParameterList>& params = null);
132
133#ifdef HAVE_XPETRA_EPETRAEXT
134 // Michael Gee's MLMultiply
135 static RCP<Epetra_CrsMatrix> MLTwoMatrixMultiply(const Epetra_CrsMatrix& epA,
136 const Epetra_CrsMatrix& epB,
138#endif // ifdef HAVE_XPETRA_EPETRAEXT
142 Returns RCP to non-constant Xpetra::BlockedCrsMatrix.
144 @param A left matrix
145 @param transposeA if true, use the transpose of A
146 @param B right matrix
147 @param transposeB if true, use the transpose of B
148 @param doOptimizeStorage if true, the resulting matrix should be fillComplete'd
149 */
150 static RCP<BlockedCrsMatrix> TwoMatrixMultiplyBlock(const BlockedCrsMatrix& A, bool transposeA,
151 const BlockedCrsMatrix& B, bool transposeB,
153 bool doFillComplete = true,
154 bool doOptimizeStorage = true);
158
168 static void TwoMatrixAdd(const Matrix& A, bool transposeA, SC alpha, Matrix& B, SC beta);
169
177 @param beta scalar multiplier for B, defaults to 1.0
178 @param C resulting sum
179 @param fos output stream for printing to screen
180 @param AHasFixedNnzPerRow
181
182 It is up to the caller to ensure that the resulting matrix sum is fillComplete'd.
183 */
184 static void TwoMatrixAdd(const Matrix& A, bool transposeA, const SC& alpha,
185 const Matrix& B, bool transposeB, const SC& beta,
186 RCP<Matrix>& C, Teuchos::FancyOStream& fos, bool AHasFixedNnzPerRow = false);
187
188}; // class MatrixMatrix
189
190#ifdef HAVE_XPETRA_EPETRA
191// specialization MatrixMatrix for SC=double, LO=GO=int
192template <>
193class MatrixMatrix<double, int, int, EpetraNode> {
194 typedef double Scalar;
195 typedef int LocalOrdinal;
196 typedef int GlobalOrdinal;
199
200 public:
210 @param transposeB Input, whether to use transpose of matrix B.
211 @param C Result. On entry to this method, it doesn't matter whether
212 FillComplete() has already been called on C or not. If it has,
213 then C's graph must already contain all nonzero locations that
214 will be produced when forming the product A*B. On exit,
215 C.FillComplete() will have been called, unless the last argument
216 to this function is specified to be false.
217 @param call_FillComplete_on_result Optional argument, defaults to true.
218 Power users may specify this argument to be false if they *DON'T*
219 want this function to call C.FillComplete. (It is often useful
220 to allow this function to call C.FillComplete, in cases where
221 one or both of the input matrices are rectangular and it is not
222 trivial to know which maps to use for the domain- and range-maps.)
225 static void Multiply(const Matrix& A, bool transposeA,
226 const Matrix& B, bool transposeB,
227 Matrix& C,
228 bool call_FillComplete_on_result = true,
229 bool doOptimizeStorage = true,
230 const std::string& label = std::string(),
231 const RCP<ParameterList>& params = null) {
232 TEUCHOS_TEST_FOR_EXCEPTION(transposeA == false && C.getRowMap()->isSameAs(*A.getRowMap()) == false,
233 Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as row map of A");
234 TEUCHOS_TEST_FOR_EXCEPTION(transposeA == true && C.getRowMap()->isSameAs(*A.getDomainMap()) == false,
235 Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as domain map of A");
236
239
240 bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;
241
242 using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
243
244 if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
245#if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
246 helpers::epetraExtMult(A, transposeA, B, transposeB, C, haveMultiplyDoFillComplete);
247#else
248 throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Multiply requires EpetraExt to be compiled."));
249#endif
250 } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
251#ifdef HAVE_XPETRA_TPETRA
252#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
253 (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
254 throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra <double,int,int> ETI enabled."));
255#else
256 if (helpers::isTpetraCrs(A) && helpers::isTpetraCrs(B) && helpers::isTpetraCrs(C)) {
257 // All matrices are Crs
258 const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpA = helpers::Op2TpetraCrs(A);
259 const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpB = helpers::Op2TpetraCrs(B);
260 Tpetra::CrsMatrix<SC, LO, GO, NO>& tpC = helpers::Op2NonConstTpetraCrs(C);
261
262 // 18Feb2013 JJH I'm reenabling the code that allows the matrix matrix multiply to do the fillComplete.
263 // Previously, Tpetra's matrix matrix multiply did not support fillComplete.
264 Tpetra::MatrixMatrix::Multiply(tpA, transposeA, tpB, transposeB, tpC, haveMultiplyDoFillComplete, label, params);
265 } else if (helpers::isTpetraBlockCrs(A) && helpers::isTpetraBlockCrs(B)) {
266 // All matrices are BlockCrs (except maybe Ac)
267 // FIXME: For the moment we're just going to clobber the innards of Ac, so no reuse. Once we have a reuse kernel,
268 // we'll need to think about refactoring BlockCrs so we can do something smarter here.
269
270 if (!A.getRowMap()->getComm()->getRank())
271 std::cout << "WARNING: Using inefficient BlockCrs Multiply Placeholder" << std::endl;
272
273 const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& tpA = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraBlockCrs(A);
274 const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& tpB = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraBlockCrs(B);
275 using CRS = Tpetra::CrsMatrix<SC, LO, GO, NO>;
276 RCP<const CRS> Acrs = Tpetra::convertToCrsMatrix(tpA);
277 RCP<const CRS> Bcrs = Tpetra::convertToCrsMatrix(tpB);
278
279 // We need the global constants to do the copy back to BlockCrs
280 RCP<ParameterList> new_params;
281 if (!params.is_null()) {
282 new_params = rcp(new Teuchos::ParameterList(*params));
283 new_params->set("compute global constants", true);
284 }
285
286 // FIXME: The lines below only works because we're assuming Ac is Point
287 RCP<CRS> tempAc = Teuchos::rcp(new CRS(Acrs->getRowMap(), 0));
288 Tpetra::MatrixMatrix::Multiply(*Acrs, transposeA, *Bcrs, transposeB, *tempAc, haveMultiplyDoFillComplete, label, new_params);
289
290 // Temporary output matrix
291 RCP<Tpetra::BlockCrsMatrix<SC, LO, GO, NO> > Ac_t = Tpetra::convertToBlockCrsMatrix(*tempAc, A.GetStorageBlockSize());
294
295 // We can now cheat and replace the innards of Ac
296 RCP<Xpetra::CrsMatrixWrap<SC, LO, GO, NO> > Ac_w = Teuchos::rcp_dynamic_cast<Xpetra::CrsMatrixWrap<SC, LO, GO, NO> >(Teuchos::rcpFromRef(C));
297 Ac_w->replaceCrsMatrix(Ac_p);
298 } else {
299 // Mix and match
300 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "Mix-and-match Crs/BlockCrs Multiply not currently supported");
301 }
302#endif
303#else
304 throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra."));
305#endif
306 }
307
308 if (call_FillComplete_on_result && !haveMultiplyDoFillComplete) {
310 fillParams->set("Optimize Storage", doOptimizeStorage);
311 C.fillComplete((transposeB) ? B.getRangeMap() : B.getDomainMap(),
312 (transposeA) ? A.getDomainMap() : A.getRangeMap(),
313 fillParams);
314 }
315
316 // transfer striding information
317 RCP<Matrix> rcpA = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(A));
318 RCP<Matrix> rcpB = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(B));
319 C.CreateView("stridedMaps", rcpA, transposeA, rcpB, transposeB); // TODO use references instead of RCPs
320 } // end Multiply
321
344 static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
345 const Matrix& B, bool transposeB,
346 RCP<Matrix> C_in,
348 bool doFillComplete = true,
349 bool doOptimizeStorage = true,
350 const std::string& label = std::string(),
351 const RCP<ParameterList>& params = null) {
354
355 // Optimization using ML Multiply when available and requested
356 // This feature is currently not supported. We would have to introduce the HAVE_XPETRA_ML_MMM flag
357#if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT) && defined(HAVE_XPETRA_ML_MMM)
358 if (B.getDomainMap()->lib() == Xpetra::UseEpetra && !transposeA && !transposeB) {
359 RCP<const Epetra_CrsMatrix> epA = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(rcpFromRef(A));
360 RCP<const Epetra_CrsMatrix> epB = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(rcpFromRef(B));
361 RCP<Epetra_CrsMatrix> epC = MLTwoMatrixMultiply(*epA, *epB, fos);
362
364 if (doFillComplete) {
366 fillParams->set("Optimize Storage", doOptimizeStorage);
367 C->fillComplete(B.getDomainMap(), A.getRangeMap(), fillParams);
368 }
369
370 // Fill strided maps information
371 // This is necessary since the ML matrix matrix multiplication routine has no handling for this
372 // TODO: move this call to MLMultiply...
373 C->CreateView("stridedMaps", rcpFromRef(A), transposeA, rcpFromRef(B), transposeB);
374
375 return C;
376 }
377#endif // EPETRA + EPETRAEXT + ML
378
379 // Default case: Xpetra Multiply
380 RCP<Matrix> C = C_in;
381
382 if (C == Teuchos::null) {
383 double nnzPerRow = Teuchos::as<double>(0);
384
385#if 0
386 if (A.getDomainMap()->lib() == Xpetra::UseTpetra) {
387 // For now, follow what ML and Epetra do.
388 GO numRowsA = A.getGlobalNumRows();
389 GO numRowsB = B.getGlobalNumRows();
390 nnzPerRow = sqrt(Teuchos::as<double>(A.getGlobalNumEntries())/numRowsA) +
391 sqrt(Teuchos::as<double>(B.getGlobalNumEntries())/numRowsB) - 1;
392 nnzPerRow *= nnzPerRow;
393 double totalNnz = nnzPerRow * A.getGlobalNumRows() * 0.75 + 100;
394 double minNnz = Teuchos::as<double>(1.2 * A.getGlobalNumEntries());
395 if (totalNnz < minNnz)
396 totalNnz = minNnz;
397 nnzPerRow = totalNnz / A.getGlobalNumRows();
398
399 fos << "Matrix product nnz per row estimate = " << Teuchos::as<LO>(nnzPerRow) << std::endl;
400 }
401#endif
402
403 if (transposeA)
404 C = MatrixFactory::Build(A.getDomainMap(), Teuchos::as<LO>(nnzPerRow));
405 else
406 C = MatrixFactory::Build(A.getRowMap(), Teuchos::as<LO>(nnzPerRow));
407
408 } else {
409 C->resumeFill(); // why this is not done inside of Tpetra MxM?
410 fos << "Reuse C pattern" << std::endl;
411 }
412
413 Multiply(A, transposeA, B, transposeB, *C, doFillComplete, doOptimizeStorage, label, params); // call Multiply routine from above
414
415 return C;
416 }
417
428 static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
429 const Matrix& B, bool transposeB,
431 bool callFillCompleteOnResult = true,
432 bool doOptimizeStorage = true,
433 const std::string& label = std::string(),
434 const RCP<ParameterList>& params = null) {
435 return Multiply(A, transposeA, B, transposeB, Teuchos::null, fos, callFillCompleteOnResult, doOptimizeStorage, label, params);
436 }
437
438#if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
439 // Michael Gee's MLMultiply
441 const Epetra_CrsMatrix& epB,
443#if defined(HAVE_XPETRA_ML_MMM) // Note: this is currently not supported
444 ML_Comm* comm;
445 ML_Comm_Create(&comm);
446 fos << "****** USING ML's MATRIX MATRIX MULTIPLY ******" << std::endl;
447#ifdef HAVE_MPI
448 // ML_Comm uses MPI_COMM_WORLD, so try to use the same communicator as epA.
449 const Epetra_MpiComm* Mcomm = dynamic_cast<const Epetra_MpiComm*>(&(epA.Comm()));
450 if (Mcomm)
451 ML_Comm_Set_UsrComm(comm, Mcomm->GetMpiComm());
452#endif
453 // in order to use ML, there must be no indices missing from the matrix column maps.
454 EpetraExt::CrsMatrix_SolverMap Atransform;
455 EpetraExt::CrsMatrix_SolverMap Btransform;
456 const Epetra_CrsMatrix& A = Atransform(const_cast<Epetra_CrsMatrix&>(epA));
457 const Epetra_CrsMatrix& B = Btransform(const_cast<Epetra_CrsMatrix&>(epB));
458
459 if (!A.Filled()) throw Exceptions::RuntimeError("A has to be FillCompleted");
460 if (!B.Filled()) throw Exceptions::RuntimeError("B has to be FillCompleted");
461
462 // create ML operators from EpetraCrsMatrix
463 ML_Operator* ml_As = ML_Operator_Create(comm);
464 ML_Operator* ml_Bs = ML_Operator_Create(comm);
465 ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&A), ml_As); // Should we test if the lightweight wrapper is actually used or if WrapEpetraCrsMatrix fall backs to the heavy one?
466 ML_Operator_WrapEpetraCrsMatrix(const_cast<Epetra_CrsMatrix*>(&B), ml_Bs);
467 ML_Operator* ml_AtimesB = ML_Operator_Create(comm);
468 {
470 ML_2matmult(ml_As, ml_Bs, ml_AtimesB, ML_CSR_MATRIX); // do NOT use ML_EpetraCRS_MATRIX!!!
471 }
472 ML_Operator_Destroy(&ml_As);
473 ML_Operator_Destroy(&ml_Bs);
474
475 // For ml_AtimesB we have to reconstruct the column map in global indexing,
476 // The following is going down to the salt-mines of ML ...
477 // note: we use integers, since ML only works for Epetra...
478 int N_local = ml_AtimesB->invec_leng;
479 ML_CommInfoOP* getrow_comm = ml_AtimesB->getrow->pre_comm;
480 if (!getrow_comm) throw(Exceptions::RuntimeError("ML_Operator does not have a CommInfo"));
481 ML_Comm* comm_AB = ml_AtimesB->comm; // get comm object
482 if (N_local != B.DomainMap().NumMyElements())
483 throw(Exceptions::RuntimeError("Mismatch in local row dimension between ML and Epetra"));
484 int N_rcvd = 0;
485 int N_send = 0;
486 int flag = 0;
487 for (int i = 0; i < getrow_comm->N_neighbors; i++) {
488 N_rcvd += (getrow_comm->neighbors)[i].N_rcv;
489 N_send += (getrow_comm->neighbors)[i].N_send;
490 if (((getrow_comm->neighbors)[i].N_rcv != 0) &&
491 ((getrow_comm->neighbors)[i].rcv_list != NULL)) flag = 1;
492 }
493 // For some unknown reason, ML likes to have stuff one larger than
494 // neccessary...
495 std::vector<double> dtemp(N_local + N_rcvd + 1); // "double" vector for comm function
496 std::vector<int> cmap(N_local + N_rcvd + 1); // vector for gids
497 for (int i = 0; i < N_local; ++i) {
498 cmap[i] = B.DomainMap().GID(i);
499 dtemp[i] = (double)cmap[i];
500 }
501 ML_cheap_exchange_bdry(&dtemp[0], getrow_comm, N_local, N_send, comm_AB); // do communication
502 if (flag) { // process received data
503 int count = N_local;
504 const int neighbors = getrow_comm->N_neighbors;
505 for (int i = 0; i < neighbors; i++) {
506 const int nrcv = getrow_comm->neighbors[i].N_rcv;
507 for (int j = 0; j < nrcv; j++)
508 cmap[getrow_comm->neighbors[i].rcv_list[j]] = (int)dtemp[count++];
509 }
510 } else {
511 for (int i = 0; i < N_local + N_rcvd; ++i)
512 cmap[i] = (int)dtemp[i];
513 }
514 dtemp.clear(); // free double array
515
516 // we can now determine a matching column map for the result
517 Epetra_Map gcmap(-1, N_local + N_rcvd, &cmap[0], B.ColMap().IndexBase(), A.Comm());
518
519 int allocated = 0;
520 int rowlength;
521 double* val = NULL;
522 int* bindx = NULL;
523
524 const int myrowlength = A.RowMap().NumMyElements();
525 const Epetra_Map& rowmap = A.RowMap();
526
527 // Determine the maximum bandwith for the result matrix.
528 // replaces the old, very(!) memory-consuming guess:
529 // int guessnpr = A.MaxNumEntries()*B.MaxNumEntries();
530 int educatedguess = 0;
531 for (int i = 0; i < myrowlength; ++i) {
532 // get local row
533 ML_get_matrix_row(ml_AtimesB, 1, &i, &allocated, &bindx, &val, &rowlength, 0);
534 if (rowlength > educatedguess)
535 educatedguess = rowlength;
536 }
537
538 // allocate our result matrix and fill it
539 RCP<Epetra_CrsMatrix> result = rcp(new Epetra_CrsMatrix(::Copy, A.RangeMap(), gcmap, educatedguess, false));
540
541 std::vector<int> gcid(educatedguess);
542 for (int i = 0; i < myrowlength; ++i) {
543 const int grid = rowmap.GID(i);
544 // get local row
545 ML_get_matrix_row(ml_AtimesB, 1, &i, &allocated, &bindx, &val, &rowlength, 0);
546 if (!rowlength) continue;
547 if ((int)gcid.size() < rowlength) gcid.resize(rowlength);
548 for (int j = 0; j < rowlength; ++j) {
549 gcid[j] = gcmap.GID(bindx[j]);
550 if (gcid[j] < 0)
551 throw Exceptions::RuntimeError("Error: cannot find gcid!");
552 }
553 int err = result->InsertGlobalValues(grid, rowlength, val, &gcid[0]);
554 if (err != 0 && err != 1) {
555 std::ostringstream errStr;
556 errStr << "Epetra_CrsMatrix::InsertGlobalValues returned err=" << err;
557 throw Exceptions::RuntimeError(errStr.str());
558 }
559 }
560 // free memory
561 if (bindx) ML_free(bindx);
562 if (val) ML_free(val);
563 ML_Operator_Destroy(&ml_AtimesB);
564 ML_Comm_Destroy(&comm);
565
566 return result;
567#else // no MUELU_ML
568 (void)epA;
569 (void)epB;
570 (void)fos;
572 "No ML multiplication available. This feature is currently not supported by Xpetra.");
573 TEUCHOS_UNREACHABLE_RETURN(Teuchos::null);
574#endif
575 }
576#endif // ifdef HAVE_XPETRA_EPETRAEXT
577
588 static RCP<BlockedCrsMatrix> TwoMatrixMultiplyBlock(const BlockedCrsMatrix& A, bool transposeA,
589 const BlockedCrsMatrix& B, bool transposeB,
591 bool doFillComplete = true,
592 bool doOptimizeStorage = true) {
594 "TwoMatrixMultiply for BlockedCrsMatrix not implemented for transposeA==true or transposeB==true");
595
596 // Preconditions
599
602
603 RCP<BlockedCrsMatrix> C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
604
605 for (size_t i = 0; i < A.Rows(); ++i) { // loop over all block rows of A
606 for (size_t j = 0; j < B.Cols(); ++j) { // loop over all block columns of B
607 RCP<Matrix> Cij;
608
609 for (size_t l = 0; l < B.Rows(); ++l) { // loop for calculating entry C_{ij}
610 RCP<Matrix> crmat1 = A.getMatrix(i, l);
611 RCP<Matrix> crmat2 = B.getMatrix(l, j);
612
613 if (crmat1.is_null() || crmat2.is_null())
614 continue;
615
616 // try unwrapping 1x1 blocked matrix
617 {
618 auto unwrap1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
619 auto unwrap2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
620
621 if (unwrap1.is_null() != unwrap2.is_null()) {
622 if (unwrap1 != Teuchos::null && unwrap1->Rows() == 1 && unwrap1->Cols() == 1)
623 crmat1 = unwrap1->getCrsMatrix();
624 if (unwrap2 != Teuchos::null && unwrap2->Rows() == 1 && unwrap2->Cols() == 1)
625 crmat2 = unwrap2->getCrsMatrix();
626 }
627 }
628
629 RCP<CrsMatrixWrap> crop1 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat1);
630 RCP<CrsMatrixWrap> crop2 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat2);
632 "A and B must be either both (compatible) BlockedCrsMatrix objects or both CrsMatrixWrap objects.");
633
634 // Forcibly compute the global constants if we don't have them (only works for real CrsMatrices, not nested blocks)
635 if (!crop1.is_null())
636 Teuchos::rcp_const_cast<CrsGraph>(crmat1->getCrsGraph())->computeGlobalConstants();
637 if (!crop2.is_null())
638 Teuchos::rcp_const_cast<CrsGraph>(crmat2->getCrsGraph())->computeGlobalConstants();
639
640 TEUCHOS_TEST_FOR_EXCEPTION(!crmat1->haveGlobalConstants(), Exceptions::RuntimeError,
641 "crmat1 does not have global constants");
642 TEUCHOS_TEST_FOR_EXCEPTION(!crmat2->haveGlobalConstants(), Exceptions::RuntimeError,
643 "crmat2 does not have global constants");
644
645 if (crmat1->getGlobalNumEntries() == 0 || crmat2->getGlobalNumEntries() == 0)
646 continue;
647
648 // temporary matrix containing result of local block multiplication
649 RCP<Matrix> temp = Teuchos::null;
650
651 if (crop1 != Teuchos::null && crop2 != Teuchos::null)
652 temp = Multiply(*crop1, false, *crop2, false, fos);
653 else {
654 RCP<BlockedCrsMatrix> bop1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
655 RCP<BlockedCrsMatrix> bop2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
656 TEUCHOS_TEST_FOR_EXCEPTION(bop1.is_null() == true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
657 TEUCHOS_TEST_FOR_EXCEPTION(bop2.is_null() == true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
658 TEUCHOS_TEST_FOR_EXCEPTION(bop1->Cols() != bop2->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << bop1->Cols() << " columns and B has " << bop2->Rows() << " rows. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
659 TEUCHOS_TEST_FOR_EXCEPTION(bop1->getDomainMap()->isSameAs(*(bop2->getRangeMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
660
661 // recursive multiplication call
662 temp = TwoMatrixMultiplyBlock(*bop1, transposeA, *bop2, transposeB, fos, doFillComplete, doOptimizeStorage);
663
664 RCP<BlockedCrsMatrix> btemp = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(temp);
665 TEUCHOS_TEST_FOR_EXCEPTION(btemp->Rows() != bop1->Rows(), Xpetra::Exceptions::RuntimeError, "Number of block rows of local blocked operator is " << btemp->Rows() << " but should be " << bop1->Rows() << ". (TwoMatrixMultiplyBlock)");
666 TEUCHOS_TEST_FOR_EXCEPTION(btemp->Cols() != bop2->Cols(), Xpetra::Exceptions::RuntimeError, "Number of block cols of local blocked operator is " << btemp->Cols() << " but should be " << bop2->Cols() << ". (TwoMatrixMultiplyBlock)");
667 TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getFullMap()->isSameAs(*(bop1->getRangeMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of local blocked operator should be same as first operator. (TwoMatrixMultiplyBlock)");
668 TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getFullMap()->isSameAs(*(bop2->getDomainMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of local blocked operator should be same as second operator. (TwoMatrixMultiplyBlock)");
669 TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getThyraMode() != bop1->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local range map extractor incompatible with range map extractor of A (TwoMatrixMultiplyBlock)");
670 TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getThyraMode() != bop2->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local domain map extractor incompatible with domain map extractor of B (TwoMatrixMultiplyBlock)");
671 }
672
673 TEUCHOS_TEST_FOR_EXCEPTION(temp->isFillComplete() == false, Xpetra::Exceptions::RuntimeError, "Local block is not filled. (TwoMatrixMultiplyBlock)");
674
675 RCP<Matrix> addRes = null;
676 if (Cij.is_null())
677 Cij = temp;
678 else {
679 MatrixMatrix::TwoMatrixAdd(*temp, false, 1.0, *Cij, false, 1.0, addRes, fos);
680 Cij = addRes;
681 }
682 }
683
684 if (!Cij.is_null()) {
685 if (Cij->isFillComplete())
686 Cij->resumeFill();
687 Cij->fillComplete(B.getDomainMap(j), A.getRangeMap(i));
688 C->setMatrix(i, j, Cij);
689 } else {
690 C->setMatrix(i, j, Teuchos::null);
691 }
692 }
693 }
694
695 if (doFillComplete)
696 C->fillComplete(); // call default fillComplete for BlockCrsMatrixWrap objects
697
698 return C;
699 } // TwoMatrixMultiplyBlock
700
713 static void TwoMatrixAdd(const Matrix& A, bool transposeA, SC alpha, Matrix& B, SC beta) {
715 "TwoMatrixAdd: matrix row maps are not the same.");
716
717 if (A.getRowMap()->lib() == Xpetra::UseEpetra) {
718#if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
719 const Epetra_CrsMatrix& epA = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(A);
720 Epetra_CrsMatrix& epB = Xpetra::Helpers<SC, LO, GO, NO>::Op2NonConstEpetraCrs(B);
721
722 // FIXME is there a bug if beta=0?
723 int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, beta);
724
725 if (rv != 0)
726 throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value " + Teuchos::toString(rv));
727 std::ostringstream buf;
728#else
729 throw Exceptions::RuntimeError("Xpetra must be compiled with EpetraExt.");
730#endif
731 } else if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
732#ifdef HAVE_XPETRA_TPETRA
733#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
734 (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
735 throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=int enabled."));
736#else
737 const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpA = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraCrs(A);
738 Tpetra::CrsMatrix<SC, LO, GO, NO>& tpB = Xpetra::Helpers<SC, LO, GO, NO>::Op2NonConstTpetraCrs(B);
739
740 Tpetra::MatrixMatrix::Add(tpA, transposeA, alpha, tpB, beta);
741#endif
742#else
743 throw Exceptions::RuntimeError("Xpetra must be compiled with Tpetra.");
744#endif
745 }
746 } // MatrixMatrix::TwoMatrixAdd()
747
762 static void TwoMatrixAdd(const Matrix& A, bool transposeA, const SC& alpha,
763 const Matrix& B, bool transposeB, const SC& beta,
764 RCP<Matrix>& C, Teuchos::FancyOStream& fos, bool AHasFixedNnzPerRow = false) {
765 using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
766 RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
767 RCP<const Matrix> rcpB = Teuchos::rcpFromRef(B);
768 RCP<const BlockedCrsMatrix> rcpBopA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpA);
769 RCP<const BlockedCrsMatrix> rcpBopB = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpB);
770
771 if (rcpBopA == Teuchos::null && rcpBopB == Teuchos::null) {
772 if (!(A.getRowMap()->isSameAs(*(B.getRowMap()))))
773 throw Exceptions::Incompatible("TwoMatrixAdd: matrix row maps are not the same.");
774
775 auto lib = A.getRowMap()->lib();
776 if (lib == Xpetra::UseEpetra) {
777#if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
778 const Epetra_CrsMatrix& epA = helpers::Op2EpetraCrs(A);
779 const Epetra_CrsMatrix& epB = helpers::Op2EpetraCrs(B);
780 if (C.is_null()) {
781 size_t maxNzInA = 0;
782 size_t maxNzInB = 0;
783 size_t numLocalRows = 0;
784 if (A.isFillComplete() && B.isFillComplete()) {
785 maxNzInA = A.getLocalMaxNumRowEntries();
786 maxNzInB = B.getLocalMaxNumRowEntries();
787 numLocalRows = A.getLocalNumRows();
788 }
789
790 if (maxNzInA == 1 || maxNzInB == 1 || AHasFixedNnzPerRow) {
791 // first check if either A or B has at most 1 nonzero per row
792 // the case of both having at most 1 nz per row is handled by the ``else''
793 Teuchos::ArrayRCP<size_t> exactNnzPerRow(numLocalRows);
794
795 if ((maxNzInA == 1 && maxNzInB > 1) || AHasFixedNnzPerRow) {
796 for (size_t i = 0; i < numLocalRows; ++i)
797 exactNnzPerRow[i] = B.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInA;
798
799 } else {
800 for (size_t i = 0; i < numLocalRows; ++i)
801 exactNnzPerRow[i] = A.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInB;
802 }
803
804 fos << "MatrixMatrix::TwoMatrixAdd : special case detected (one matrix has a fixed nnz per row)"
805 << ", using static profiling" << std::endl;
806 C = rcp(new Xpetra::CrsMatrixWrap<SC, LO, GO, NO>(A.getRowMap(), exactNnzPerRow));
807
808 } else {
809 // general case
810 LO maxPossibleNNZ = A.getLocalMaxNumRowEntries() + B.getLocalMaxNumRowEntries();
811 C = rcp(new Xpetra::CrsMatrixWrap<SC, LO, GO, NO>(A.getRowMap(), maxPossibleNNZ));
812 }
813 if (transposeB)
814 fos << "MatrixMatrix::TwoMatrixAdd : ** WARNING ** estimate could be badly wrong because second summand is transposed" << std::endl;
815 }
816 RCP<Epetra_CrsMatrix> epC = helpers::Op2NonConstEpetraCrs(C);
817 Epetra_CrsMatrix* ref2epC = &*epC; // to avoid a compiler error...
818
819 // FIXME is there a bug if beta=0?
820 int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, transposeB, beta, ref2epC);
821
822 if (rv != 0)
823 throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value of " + Teuchos::toString(rv));
824#else
825 throw Exceptions::RuntimeError("MueLu must be compile with EpetraExt.");
826#endif
827 } else if (lib == Xpetra::UseTpetra) {
828#ifdef HAVE_XPETRA_TPETRA
829#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_INT))) || \
830 (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_INT))))
831 throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=int enabled."));
832#else
833 using tcrs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NO>;
834 const tcrs_matrix_type& tpA = helpers::Op2TpetraCrs(A);
835 const tcrs_matrix_type& tpB = helpers::Op2TpetraCrs(B);
836 C = helpers::tpetraAdd(tpA, transposeA, alpha, tpB, transposeB, beta);
837#endif
838#else
839 throw Exceptions::RuntimeError("Xpetra must be compile with Tpetra.");
840#endif
841 }
842
844 if (A.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(A));
845 if (B.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(B));
847 }
848 // the first matrix is of type CrsMatrixWrap, the second is a blocked operator
849 else if (rcpBopA == Teuchos::null && rcpBopB != Teuchos::null) {
850 RCP<const MapExtractor> rgmapextractor = rcpBopB->getRangeMapExtractor();
851 RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
852
853 C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
854 RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
855
856 size_t i = 0;
857 for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
858 RCP<Matrix> Cij = Teuchos::null;
859 if (rcpA != Teuchos::null &&
860 rcpBopB->getMatrix(i, j) != Teuchos::null) {
861 // recursive call
862 TwoMatrixAdd(*rcpA, transposeA, alpha,
863 *(rcpBopB->getMatrix(i, j)), transposeB, beta,
864 Cij, fos, AHasFixedNnzPerRow);
865 } else if (rcpA == Teuchos::null &&
866 rcpBopB->getMatrix(i, j) != Teuchos::null) {
867 Cij = rcpBopB->getMatrix(i, j);
868 } else if (rcpA != Teuchos::null &&
869 rcpBopB->getMatrix(i, j) == Teuchos::null) {
870 Cij = Teuchos::rcp_const_cast<Matrix>(rcpA);
871 } else {
872 Cij = Teuchos::null;
873 }
874
875 if (!Cij.is_null()) {
876 if (Cij->isFillComplete())
877 Cij->resumeFill();
878 Cij->fillComplete();
879 bC->setMatrix(i, j, Cij);
880 } else {
881 bC->setMatrix(i, j, Teuchos::null);
882 }
883 } // loop over columns j
884 }
885 // the second matrix is of type CrsMatrixWrap, the first is a blocked operator
886 else if (rcpBopA != Teuchos::null && rcpBopB == Teuchos::null) {
887 RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
888 RCP<const MapExtractor> domapextractor = rcpBopA->getDomainMapExtractor();
889
890 C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
891 RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
892
893 size_t j = 0;
894 for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
895 RCP<Matrix> Cij = Teuchos::null;
896 if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
897 rcpB != Teuchos::null) {
898 // recursive call
899 TwoMatrixAdd(*(rcpBopA->getMatrix(i, j)), transposeA, alpha,
900 *rcpB, transposeB, beta,
901 Cij, fos, AHasFixedNnzPerRow);
902 } else if (rcpBopA->getMatrix(i, j) == Teuchos::null &&
903 rcpB != Teuchos::null) {
904 Cij = Teuchos::rcp_const_cast<Matrix>(rcpB);
905 } else if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
906 rcpB == Teuchos::null) {
907 Cij = rcpBopA->getMatrix(i, j);
908 } else {
909 Cij = Teuchos::null;
910 }
911
912 if (!Cij.is_null()) {
913 if (Cij->isFillComplete())
914 Cij->resumeFill();
915 Cij->fillComplete();
916 bC->setMatrix(i, j, Cij);
917 } else {
918 bC->setMatrix(i, j, Teuchos::null);
919 }
920 } // loop over rows i
921 } else {
922 // This is the version for blocked matrices
923
924 // check the compatibility of the blocked operators
925 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA.is_null() == true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixAdd)");
926 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopB.is_null() == true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixAdd)");
927 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows() != rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Rows() << " rows and B has " << rcpBopA->Rows() << " rows. Matrices are not compatible! (TwoMatrixAdd)");
928 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows() != rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Cols() << " cols and B has " << rcpBopA->Cols() << " cols. Matrices are not compatible! (TwoMatrixAdd)");
929 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMap()->isSameAs(*(rcpBopB->getRangeMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixAdd)");
930 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMap()->isSameAs(*(rcpBopB->getDomainMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as domain map of B. Matrices are not compatible! (TwoMatrixAdd)");
931 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMapExtractor()->getThyraMode() != rcpBopB->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in RangeMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
932 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMapExtractor()->getThyraMode() != rcpBopB->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in DomainMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
933
934 RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
935 RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
936
937 C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
938 RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
939
940 for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
941 for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
942
943 RCP<Matrix> Cij = Teuchos::null;
944 if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
945 rcpBopB->getMatrix(i, j) != Teuchos::null) {
946 // recursive call
947
948 TwoMatrixAdd(*(rcpBopA->getMatrix(i, j)), transposeA, alpha,
949 *(rcpBopB->getMatrix(i, j)), transposeB, beta,
950 Cij, fos, AHasFixedNnzPerRow);
951 } else if (rcpBopA->getMatrix(i, j) == Teuchos::null &&
952 rcpBopB->getMatrix(i, j) != Teuchos::null) {
953 Cij = rcpBopB->getMatrix(i, j);
954 } else if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
955 rcpBopB->getMatrix(i, j) == Teuchos::null) {
956 Cij = rcpBopA->getMatrix(i, j);
957 } else {
958 Cij = Teuchos::null;
959 }
960
961 if (!Cij.is_null()) {
962 if (Cij->isFillComplete())
963 Cij->resumeFill();
964 // Cij->fillComplete(rcpBopA->getDomainMap(j), rcpBopA->getRangeMap(i));
965 Cij->fillComplete();
966 bC->setMatrix(i, j, Cij);
967 } else {
968 bC->setMatrix(i, j, Teuchos::null);
969 }
970 } // loop over columns j
971 } // loop over rows i
972
973 } // end blocked recursive algorithm
974 } // MatrixMatrix::TwoMatrixAdd()
975}; // end specialization on SC=double, GO=int and NO=EpetraNode
976
977// specialization MatrixMatrix for SC=double, GO=long long and NO=EptraNode
978template <>
979class MatrixMatrix<double, int, long long, EpetraNode> {
980 typedef double Scalar;
981 typedef int LocalOrdinal;
982 typedef long long GlobalOrdinal;
985
986 public:
1011 static void Multiply(const Matrix& A, bool transposeA,
1012 const Matrix& B, bool transposeB,
1013 Matrix& C,
1014 bool call_FillComplete_on_result = true,
1015 bool doOptimizeStorage = true,
1016 const std::string& label = std::string(),
1017 const RCP<ParameterList>& params = null) {
1018 using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
1019 TEUCHOS_TEST_FOR_EXCEPTION(transposeA == false && C.getRowMap()->isSameAs(*A.getRowMap()) == false,
1020 Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as row map of A");
1021 TEUCHOS_TEST_FOR_EXCEPTION(transposeA == true && C.getRowMap()->isSameAs(*A.getDomainMap()) == false,
1022 Xpetra::Exceptions::RuntimeError, "XpetraExt::MatrixMatrix::Multiply: row map of C is not same as domain map of A");
1023
1026
1027 bool haveMultiplyDoFillComplete = call_FillComplete_on_result && doOptimizeStorage;
1028
1029 if (C.getRowMap()->lib() == Xpetra::UseEpetra) {
1030#if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1031 helpers::epetraExtMult(A, transposeA, B, transposeB, C, haveMultiplyDoFillComplete);
1032#else
1033 throw(Xpetra::Exceptions::RuntimeError("Xpetra::MatrixMatrix::Multiply requires EpetraExt to be compiled."));
1034#endif
1035 } else if (C.getRowMap()->lib() == Xpetra::UseTpetra) {
1036#ifdef HAVE_XPETRA_TPETRA
1037#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \
1038 (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))))
1039 throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra <double,int,long long, EpetraNode> ETI enabled."));
1040#else
1041 if (helpers::isTpetraCrs(A) && helpers::isTpetraCrs(B) && helpers::isTpetraCrs(C)) {
1042 // All matrices are Crs
1043 const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpA = helpers::Op2TpetraCrs(A);
1044 const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpB = helpers::Op2TpetraCrs(B);
1045 Tpetra::CrsMatrix<SC, LO, GO, NO>& tpC = helpers::Op2NonConstTpetraCrs(C);
1046
1047 // 18Feb2013 JJH I'm reenabling the code that allows the matrix matrix multiply to do the fillComplete.
1048 // Previously, Tpetra's matrix matrix multiply did not support fillComplete.
1049 Tpetra::MatrixMatrix::Multiply(tpA, transposeA, tpB, transposeB, tpC, haveMultiplyDoFillComplete, label, params);
1050 } else if (helpers::isTpetraBlockCrs(A) && helpers::isTpetraBlockCrs(B)) {
1051 // All matrices are BlockCrs (except maybe Ac)
1052 // FIXME: For the moment we're just going to clobber the innards of Ac, so no reuse. Once we have a reuse kernel,
1053 // we'll need to think about refactoring BlockCrs so we can do something smarter here.
1054
1055 if (!A.getRowMap()->getComm()->getRank())
1056 std::cout << "WARNING: Using inefficient BlockCrs Multiply Placeholder" << std::endl;
1057
1058 const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& tpA = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraBlockCrs(A);
1059 const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& tpB = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraBlockCrs(B);
1060 using CRS = Tpetra::CrsMatrix<SC, LO, GO, NO>;
1061 RCP<const CRS> Acrs = Tpetra::convertToCrsMatrix(tpA);
1062 RCP<const CRS> Bcrs = Tpetra::convertToCrsMatrix(tpB);
1063
1064 // We need the global constants to do the copy back to BlockCrs
1065 RCP<ParameterList> new_params;
1066 if (!params.is_null()) {
1067 new_params = rcp(new Teuchos::ParameterList(*params));
1068 new_params->set("compute global constants", true);
1069 }
1070
1071 // FIXME: The lines below only works because we're assuming Ac is Point
1072 RCP<CRS> tempAc = Teuchos::rcp(new CRS(Acrs->getRowMap(), 0));
1073 Tpetra::MatrixMatrix::Multiply(*Acrs, transposeA, *Bcrs, transposeB, *tempAc, haveMultiplyDoFillComplete, label, new_params);
1074
1075 // Temporary output matrix
1076 RCP<Tpetra::BlockCrsMatrix<SC, LO, GO, NO> > Ac_t = Tpetra::convertToBlockCrsMatrix(*tempAc, A.GetStorageBlockSize());
1079
1080 // We can now cheat and replace the innards of Ac
1081 RCP<Xpetra::CrsMatrixWrap<SC, LO, GO, NO> > Ac_w = Teuchos::rcp_dynamic_cast<Xpetra::CrsMatrixWrap<SC, LO, GO, NO> >(Teuchos::rcpFromRef(C));
1082 Ac_w->replaceCrsMatrix(Ac_p);
1083 } else {
1084 // Mix and match
1085 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "Mix-and-match Crs/BlockCrs Multiply not currently supported");
1086 }
1087
1088#endif
1089#else
1090 throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra."));
1091#endif
1092 }
1093
1094 if (call_FillComplete_on_result && !haveMultiplyDoFillComplete) {
1096 fillParams->set("Optimize Storage", doOptimizeStorage);
1097 C.fillComplete((transposeB) ? B.getRangeMap() : B.getDomainMap(),
1098 (transposeA) ? A.getDomainMap() : A.getRangeMap(),
1099 fillParams);
1100 }
1101
1102 // transfer striding information
1103 RCP<Matrix> rcpA = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(A));
1104 RCP<Matrix> rcpB = Teuchos::rcp_const_cast<Matrix>(Teuchos::rcpFromRef(B));
1105 C.CreateView("stridedMaps", rcpA, transposeA, rcpB, transposeB); // TODO use references instead of RCPs
1106 } // end Multiply
1107
1130 static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
1131 const Matrix& B, bool transposeB,
1132 RCP<Matrix> C_in,
1134 bool doFillComplete = true,
1135 bool doOptimizeStorage = true,
1136 const std::string& label = std::string(),
1137 const RCP<ParameterList>& params = null) {
1138 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
1139 TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
1140
1141 // Default case: Xpetra Multiply
1142 RCP<Matrix> C = C_in;
1143
1144 if (C == Teuchos::null) {
1145 double nnzPerRow = Teuchos::as<double>(0);
1146
1147#if 0
1148 if (A.getDomainMap()->lib() == Xpetra::UseTpetra) {
1149 // For now, follow what ML and Epetra do.
1150 GO numRowsA = A.getGlobalNumRows();
1151 GO numRowsB = B.getGlobalNumRows();
1152 nnzPerRow = sqrt(Teuchos::as<double>(A.getGlobalNumEntries())/numRowsA) +
1153 sqrt(Teuchos::as<double>(B.getGlobalNumEntries())/numRowsB) - 1;
1154 nnzPerRow *= nnzPerRow;
1155 double totalNnz = nnzPerRow * A.getGlobalNumRows() * 0.75 + 100;
1156 double minNnz = Teuchos::as<double>(1.2 * A.getGlobalNumEntries());
1157 if (totalNnz < minNnz)
1158 totalNnz = minNnz;
1159 nnzPerRow = totalNnz / A.getGlobalNumRows();
1160
1161 fos << "Matrix product nnz per row estimate = " << Teuchos::as<LO>(nnzPerRow) << std::endl;
1162 }
1163#endif
1164 if (transposeA)
1165 C = MatrixFactory::Build(A.getDomainMap(), Teuchos::as<LO>(nnzPerRow));
1166 else
1167 C = MatrixFactory::Build(A.getRowMap(), Teuchos::as<LO>(nnzPerRow));
1168
1169 } else {
1170 C->resumeFill(); // why this is not done inside of Tpetra MxM?
1171 fos << "Reuse C pattern" << std::endl;
1172 }
1173
1174 Multiply(A, transposeA, B, transposeB, *C, doFillComplete, doOptimizeStorage, label, params); // call Multiply routine from above
1175
1176 return C;
1177 }
1178
1189 static RCP<Matrix> Multiply(const Matrix& A, bool transposeA,
1190 const Matrix& B, bool transposeB,
1192 bool callFillCompleteOnResult = true,
1193 bool doOptimizeStorage = true,
1194 const std::string& label = std::string(),
1195 const RCP<ParameterList>& params = null) {
1196 return Multiply(A, transposeA, B, transposeB, Teuchos::null, fos, callFillCompleteOnResult, doOptimizeStorage, label, params);
1197 }
1198
1199#if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1200 // Michael Gee's MLMultiply
1202 const Epetra_CrsMatrix& /* epB */,
1203 Teuchos::FancyOStream& /* fos */) {
1205 "No ML multiplication available. This feature is currently not supported by Xpetra.");
1206 TEUCHOS_UNREACHABLE_RETURN(Teuchos::null);
1207 }
1208#endif // ifdef HAVE_XPETRA_EPETRAEXT
1209
1220 static RCP<BlockedCrsMatrix> TwoMatrixMultiplyBlock(const BlockedCrsMatrix& A, bool transposeA,
1221 const BlockedCrsMatrix& B, bool transposeB,
1223 bool doFillComplete = true,
1224 bool doOptimizeStorage = true) {
1225 TEUCHOS_TEST_FOR_EXCEPTION(transposeA || transposeB, Exceptions::RuntimeError,
1226 "TwoMatrixMultiply for BlockedCrsMatrix not implemented for transposeA==true or transposeB==true");
1227
1228 // Preconditions
1229 TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), Exceptions::RuntimeError, "A is not fill-completed");
1230 TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), Exceptions::RuntimeError, "B is not fill-completed");
1231
1232 RCP<const MapExtractor> rgmapextractor = A.getRangeMapExtractor();
1234
1235 RCP<BlockedCrsMatrix> C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1236
1237 for (size_t i = 0; i < A.Rows(); ++i) { // loop over all block rows of A
1238 for (size_t j = 0; j < B.Cols(); ++j) { // loop over all block columns of B
1239 RCP<Matrix> Cij;
1240
1241 for (size_t l = 0; l < B.Rows(); ++l) { // loop for calculating entry C_{ij}
1242 RCP<Matrix> crmat1 = A.getMatrix(i, l);
1243 RCP<Matrix> crmat2 = B.getMatrix(l, j);
1244
1245 if (crmat1.is_null() || crmat2.is_null())
1246 continue;
1247
1248 // try unwrapping 1x1 blocked matrix
1249 {
1250 auto unwrap1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
1251 auto unwrap2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
1252
1253 if (unwrap1.is_null() != unwrap2.is_null()) {
1254 if (unwrap1 != Teuchos::null && unwrap1->Rows() == 1 && unwrap1->Cols() == 1)
1255 crmat1 = unwrap1->getCrsMatrix();
1256 if (unwrap2 != Teuchos::null && unwrap2->Rows() == 1 && unwrap2->Cols() == 1)
1257 crmat2 = unwrap2->getCrsMatrix();
1258 }
1259 }
1260
1261 RCP<CrsMatrixWrap> crop1 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat1);
1262 RCP<CrsMatrixWrap> crop2 = Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(crmat2);
1264 "A and B must be either both (compatible) BlockedCrsMatrix objects or both CrsMatrixWrap objects.");
1265
1266 // Forcibly compute the global constants if we don't have them (only works for real CrsMatrices, not nested blocks)
1267 if (!crop1.is_null())
1268 Teuchos::rcp_const_cast<CrsGraph>(crmat1->getCrsGraph())->computeGlobalConstants();
1269 if (!crop2.is_null())
1270 Teuchos::rcp_const_cast<CrsGraph>(crmat2->getCrsGraph())->computeGlobalConstants();
1271
1272 TEUCHOS_TEST_FOR_EXCEPTION(!crmat1->haveGlobalConstants(), Exceptions::RuntimeError,
1273 "crmat1 does not have global constants");
1274 TEUCHOS_TEST_FOR_EXCEPTION(!crmat2->haveGlobalConstants(), Exceptions::RuntimeError,
1275 "crmat2 does not have global constants");
1276
1277 if (crmat1->getGlobalNumEntries() == 0 || crmat2->getGlobalNumEntries() == 0)
1278 continue;
1279
1280 // temporary matrix containing result of local block multiplication
1281 RCP<Matrix> temp = Teuchos::null;
1282
1283 if (crop1 != Teuchos::null && crop2 != Teuchos::null)
1284 temp = Multiply(*crop1, false, *crop2, false, fos);
1285 else {
1286 RCP<BlockedCrsMatrix> bop1 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat1);
1287 RCP<BlockedCrsMatrix> bop2 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(crmat2);
1288 TEUCHOS_TEST_FOR_EXCEPTION(bop1.is_null() == true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
1289 TEUCHOS_TEST_FOR_EXCEPTION(bop2.is_null() == true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixMultiplyBlock)");
1290 TEUCHOS_TEST_FOR_EXCEPTION(bop1->Cols() != bop2->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << bop1->Cols() << " columns and B has " << bop2->Rows() << " rows. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
1291 TEUCHOS_TEST_FOR_EXCEPTION(bop1->getDomainMap()->isSameAs(*(bop2->getRangeMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixMultiplyBlock)");
1292
1293 // recursive multiplication call
1294 temp = TwoMatrixMultiplyBlock(*bop1, transposeA, *bop2, transposeB, fos, doFillComplete, doOptimizeStorage);
1295
1296 RCP<BlockedCrsMatrix> btemp = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(temp);
1297 TEUCHOS_TEST_FOR_EXCEPTION(btemp->Rows() != bop1->Rows(), Xpetra::Exceptions::RuntimeError, "Number of block rows of local blocked operator is " << btemp->Rows() << " but should be " << bop1->Rows() << ". (TwoMatrixMultiplyBlock)");
1298 TEUCHOS_TEST_FOR_EXCEPTION(btemp->Cols() != bop2->Cols(), Xpetra::Exceptions::RuntimeError, "Number of block cols of local blocked operator is " << btemp->Cols() << " but should be " << bop2->Cols() << ". (TwoMatrixMultiplyBlock)");
1299 TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getFullMap()->isSameAs(*(bop1->getRangeMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of local blocked operator should be same as first operator. (TwoMatrixMultiplyBlock)");
1300 TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getFullMap()->isSameAs(*(bop2->getDomainMapExtractor()->getFullMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of local blocked operator should be same as second operator. (TwoMatrixMultiplyBlock)");
1301 TEUCHOS_TEST_FOR_EXCEPTION(btemp->getRangeMapExtractor()->getThyraMode() != bop1->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local range map extractor incompatible with range map extractor of A (TwoMatrixMultiplyBlock)");
1302 TEUCHOS_TEST_FOR_EXCEPTION(btemp->getDomainMapExtractor()->getThyraMode() != bop2->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Thyra mode of local domain map extractor incompatible with domain map extractor of B (TwoMatrixMultiplyBlock)");
1303 }
1304
1305 TEUCHOS_TEST_FOR_EXCEPTION(temp->isFillComplete() == false, Xpetra::Exceptions::RuntimeError, "Local block is not filled. (TwoMatrixMultiplyBlock)");
1306
1307 RCP<Matrix> addRes = null;
1308 if (Cij.is_null())
1309 Cij = temp;
1310 else {
1311 MatrixMatrix::TwoMatrixAdd(*temp, false, 1.0, *Cij, false, 1.0, addRes, fos);
1312 Cij = addRes;
1313 }
1314 }
1315
1316 if (!Cij.is_null()) {
1317 if (Cij->isFillComplete())
1318 Cij->resumeFill();
1319 Cij->fillComplete(B.getDomainMap(j), A.getRangeMap(i));
1320 C->setMatrix(i, j, Cij);
1321 } else {
1322 C->setMatrix(i, j, Teuchos::null);
1323 }
1324 }
1325 }
1326
1327 if (doFillComplete)
1328 C->fillComplete(); // call default fillComplete for BlockCrsMatrixWrap objects
1329
1330 return C;
1331 } // TwoMatrixMultiplyBlock
1332
1345 static void TwoMatrixAdd(const Matrix& A, bool transposeA, SC alpha, Matrix& B, SC beta) {
1347 "TwoMatrixAdd: matrix row maps are not the same.");
1348
1349 if (A.getRowMap()->lib() == Xpetra::UseEpetra) {
1350#if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1351 const Epetra_CrsMatrix& epA = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(A);
1352 Epetra_CrsMatrix& epB = Xpetra::Helpers<SC, LO, GO, NO>::Op2NonConstEpetraCrs(B);
1353
1354 // FIXME is there a bug if beta=0?
1355 int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, beta);
1356
1357 if (rv != 0)
1358 throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value " + Teuchos::toString(rv));
1359 std::ostringstream buf;
1360#else
1361 throw Exceptions::RuntimeError("Xpetra must be compiled with EpetraExt.");
1362#endif
1363 } else if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
1364#ifdef HAVE_XPETRA_TPETRA
1365#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \
1366 (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))))
1367 throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=long long enabled."));
1368#else
1369 const Tpetra::CrsMatrix<SC, LO, GO, NO>& tpA = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraCrs(A);
1370 Tpetra::CrsMatrix<SC, LO, GO, NO>& tpB = Xpetra::Helpers<SC, LO, GO, NO>::Op2NonConstTpetraCrs(B);
1371
1372 Tpetra::MatrixMatrix::Add(tpA, transposeA, alpha, tpB, beta);
1373#endif
1374#else
1375 throw Exceptions::RuntimeError("Xpetra must be compiled with Tpetra.");
1376#endif
1377 }
1378 } // MatrixMatrix::TwoMatrixAdd()
1379
1394 static void TwoMatrixAdd(const Matrix& A, bool transposeA, const SC& alpha,
1395 const Matrix& B, bool transposeB, const SC& beta,
1396 RCP<Matrix>& C, Teuchos::FancyOStream& fos, bool AHasFixedNnzPerRow = false) {
1397 RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
1398 RCP<const Matrix> rcpB = Teuchos::rcpFromRef(B);
1399 RCP<const BlockedCrsMatrix> rcpBopA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpA);
1400 RCP<const BlockedCrsMatrix> rcpBopB = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpB);
1401
1402 if (rcpBopA == Teuchos::null && rcpBopB == Teuchos::null) {
1404 "TwoMatrixAdd: matrix row maps are not the same.");
1405 auto lib = A.getRowMap()->lib();
1406 if (lib == Xpetra::UseEpetra) {
1407#if defined(HAVE_XPETRA_EPETRA) && defined(HAVE_XPETRA_EPETRAEXT)
1408 const Epetra_CrsMatrix& epA = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(A);
1409 const Epetra_CrsMatrix& epB = Xpetra::Helpers<SC, LO, GO, NO>::Op2EpetraCrs(B);
1410 if (C.is_null()) {
1411 size_t maxNzInA = 0;
1412 size_t maxNzInB = 0;
1413 size_t numLocalRows = 0;
1414 if (A.isFillComplete() && B.isFillComplete()) {
1415 maxNzInA = A.getLocalMaxNumRowEntries();
1416 maxNzInB = B.getLocalMaxNumRowEntries();
1417 numLocalRows = A.getLocalNumRows();
1418 }
1419
1420 if (maxNzInA == 1 || maxNzInB == 1 || AHasFixedNnzPerRow) {
1421 // first check if either A or B has at most 1 nonzero per row
1422 // the case of both having at most 1 nz per row is handled by the ``else''
1423 Teuchos::ArrayRCP<size_t> exactNnzPerRow(numLocalRows);
1424
1425 if ((maxNzInA == 1 && maxNzInB > 1) || AHasFixedNnzPerRow) {
1426 for (size_t i = 0; i < numLocalRows; ++i)
1427 exactNnzPerRow[i] = B.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInA;
1428
1429 } else {
1430 for (size_t i = 0; i < numLocalRows; ++i)
1431 exactNnzPerRow[i] = A.getNumEntriesInLocalRow(Teuchos::as<LO>(i)) + maxNzInB;
1432 }
1433
1434 fos << "MatrixMatrix::TwoMatrixAdd : special case detected (one matrix has a fixed nnz per row)"
1435 << ", using static profiling" << std::endl;
1436 C = rcp(new Xpetra::CrsMatrixWrap<SC, LO, GO, NO>(A.getRowMap(), exactNnzPerRow));
1437
1438 } else {
1439 // general case
1440 LO maxPossibleNNZ = A.getLocalMaxNumRowEntries() + B.getLocalMaxNumRowEntries();
1441 C = rcp(new Xpetra::CrsMatrixWrap<SC, LO, GO, NO>(A.getRowMap(), maxPossibleNNZ));
1442 }
1443 if (transposeB)
1444 fos << "MatrixMatrix::TwoMatrixAdd : ** WARNING ** estimate could be badly wrong because second summand is transposed" << std::endl;
1445 }
1446 RCP<Epetra_CrsMatrix> epC = Xpetra::Helpers<SC, LO, GO, NO>::Op2NonConstEpetraCrs(C);
1447 Epetra_CrsMatrix* ref2epC = &*epC; // to avoid a compiler error...
1448
1449 // FIXME is there a bug if beta=0?
1450 int rv = EpetraExt::MatrixMatrix::Add(epA, transposeA, alpha, epB, transposeB, beta, ref2epC);
1451
1452 if (rv != 0)
1453 throw Exceptions::RuntimeError("EpetraExt::MatrixMatrix::Add return value of " + Teuchos::toString(rv));
1454#else
1455 throw Exceptions::RuntimeError("MueLu must be compile with EpetraExt.");
1456#endif
1457 } else if (lib == Xpetra::UseTpetra) {
1458#ifdef HAVE_XPETRA_TPETRA
1459#if ((defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_OPENMP) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))) || \
1460 (!defined(EPETRA_HAVE_OMP) && (!defined(HAVE_TPETRA_INST_SERIAL) || !defined(HAVE_TPETRA_INST_INT_LONG_LONG))))
1461 throw(Xpetra::Exceptions::RuntimeError("Xpetra must be compiled with Tpetra GO=long long enabled."));
1462#else
1463 using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
1464 using tcrs_matrix_type = Tpetra::CrsMatrix<SC, LO, GO, NO>;
1465 const tcrs_matrix_type& tpA = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraCrs(A);
1466 const tcrs_matrix_type& tpB = Xpetra::Helpers<SC, LO, GO, NO>::Op2TpetraCrs(B);
1467 C = helpers::tpetraAdd(tpA, transposeA, alpha, tpB, transposeB, beta);
1468#endif
1469#else
1470 throw Exceptions::RuntimeError("Xpetra must be compile with Tpetra.");
1471#endif
1472 }
1473
1475 if (A.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(A));
1476 if (B.IsView("stridedMaps")) C->CreateView("stridedMaps", rcpFromRef(B));
1478 }
1479 // the first matrix is of type CrsMatrixWrap, the second is a blocked operator
1480 else if (rcpBopA == Teuchos::null && rcpBopB != Teuchos::null) {
1481 RCP<const MapExtractor> rgmapextractor = rcpBopB->getRangeMapExtractor();
1482 RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
1483
1484 C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1485 RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
1486
1487 size_t i = 0;
1488 for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
1489 RCP<Matrix> Cij = Teuchos::null;
1490 if (rcpA != Teuchos::null &&
1491 rcpBopB->getMatrix(i, j) != Teuchos::null) {
1492 // recursive call
1493 TwoMatrixAdd(*rcpA, transposeA, alpha,
1494 *(rcpBopB->getMatrix(i, j)), transposeB, beta,
1495 Cij, fos, AHasFixedNnzPerRow);
1496 } else if (rcpA == Teuchos::null &&
1497 rcpBopB->getMatrix(i, j) != Teuchos::null) {
1498 Cij = rcpBopB->getMatrix(i, j);
1499 } else if (rcpA != Teuchos::null &&
1500 rcpBopB->getMatrix(i, j) == Teuchos::null) {
1501 Cij = Teuchos::rcp_const_cast<Matrix>(rcpA);
1502 } else {
1503 Cij = Teuchos::null;
1504 }
1505
1506 if (!Cij.is_null()) {
1507 if (Cij->isFillComplete())
1508 Cij->resumeFill();
1509 Cij->fillComplete();
1510 bC->setMatrix(i, j, Cij);
1511 } else {
1512 bC->setMatrix(i, j, Teuchos::null);
1513 }
1514 } // loop over columns j
1515 }
1516 // the second matrix is of type CrsMatrixWrap, the first is a blocked operator
1517 else if (rcpBopA != Teuchos::null && rcpBopB == Teuchos::null) {
1518 RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
1519 RCP<const MapExtractor> domapextractor = rcpBopA->getDomainMapExtractor();
1520
1521 C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1522 RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
1523
1524 size_t j = 0;
1525 for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
1526 RCP<Matrix> Cij = Teuchos::null;
1527 if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
1528 rcpB != Teuchos::null) {
1529 // recursive call
1530 TwoMatrixAdd(*(rcpBopA->getMatrix(i, j)), transposeA, alpha,
1531 *rcpB, transposeB, beta,
1532 Cij, fos, AHasFixedNnzPerRow);
1533 } else if (rcpBopA->getMatrix(i, j) == Teuchos::null &&
1534 rcpB != Teuchos::null) {
1535 Cij = Teuchos::rcp_const_cast<Matrix>(rcpB);
1536 } else if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
1537 rcpB == Teuchos::null) {
1538 Cij = rcpBopA->getMatrix(i, j);
1539 } else {
1540 Cij = Teuchos::null;
1541 }
1542
1543 if (!Cij.is_null()) {
1544 if (Cij->isFillComplete())
1545 Cij->resumeFill();
1546 Cij->fillComplete();
1547 bC->setMatrix(i, j, Cij);
1548 } else {
1549 bC->setMatrix(i, j, Teuchos::null);
1550 }
1551 } // loop over rows i
1552 } else {
1553 // This is the version for blocked matrices
1554
1555 // check the compatibility of the blocked operators
1556 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA.is_null() == true, Xpetra::Exceptions::BadCast, "A is not a BlockedCrsMatrix. (TwoMatrixAdd)");
1557 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopB.is_null() == true, Xpetra::Exceptions::BadCast, "B is not a BlockedCrsMatrix. (TwoMatrixAdd)");
1558 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows() != rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Rows() << " rows and B has " << rcpBopA->Rows() << " rows. Matrices are not compatible! (TwoMatrixAdd)");
1559 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->Rows() != rcpBopB->Rows(), Xpetra::Exceptions::RuntimeError, "A has " << rcpBopA->Cols() << " cols and B has " << rcpBopA->Cols() << " cols. Matrices are not compatible! (TwoMatrixAdd)");
1560 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMap()->isSameAs(*(rcpBopB->getRangeMap())) == false, Xpetra::Exceptions::RuntimeError, "Range map of A is not the same as range map of B. Matrices are not compatible! (TwoMatrixAdd)");
1561 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMap()->isSameAs(*(rcpBopB->getDomainMap())) == false, Xpetra::Exceptions::RuntimeError, "Domain map of A is not the same as domain map of B. Matrices are not compatible! (TwoMatrixAdd)");
1562 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getRangeMapExtractor()->getThyraMode() != rcpBopB->getRangeMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in RangeMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
1563 TEUCHOS_TEST_FOR_EXCEPTION(rcpBopA->getDomainMapExtractor()->getThyraMode() != rcpBopB->getDomainMapExtractor()->getThyraMode(), Xpetra::Exceptions::RuntimeError, "Different Thyra/Xpetra style gids in DomainMapExtractor of A and B. Matrices are not compatible! (TwoMatrixAdd)");
1564
1565 RCP<const MapExtractor> rgmapextractor = rcpBopA->getRangeMapExtractor();
1566 RCP<const MapExtractor> domapextractor = rcpBopB->getDomainMapExtractor();
1567
1568 C = rcp(new BlockedCrsMatrix(rgmapextractor, domapextractor, 33 /* TODO fix me */));
1569 RCP<BlockedCrsMatrix> bC = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(C);
1570
1571 for (size_t i = 0; i < rcpBopA->Rows(); ++i) { // loop over all block rows of A
1572 for (size_t j = 0; j < rcpBopB->Cols(); ++j) { // loop over all block columns of B
1573
1574 RCP<Matrix> Cij = Teuchos::null;
1575 if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
1576 rcpBopB->getMatrix(i, j) != Teuchos::null) {
1577 // recursive call
1578
1579 TwoMatrixAdd(*(rcpBopA->getMatrix(i, j)), transposeA, alpha,
1580 *(rcpBopB->getMatrix(i, j)), transposeB, beta,
1581 Cij, fos, AHasFixedNnzPerRow);
1582 } else if (rcpBopA->getMatrix(i, j) == Teuchos::null &&
1583 rcpBopB->getMatrix(i, j) != Teuchos::null) {
1584 Cij = rcpBopB->getMatrix(i, j);
1585 } else if (rcpBopA->getMatrix(i, j) != Teuchos::null &&
1586 rcpBopB->getMatrix(i, j) == Teuchos::null) {
1587 Cij = rcpBopA->getMatrix(i, j);
1588 } else {
1589 Cij = Teuchos::null;
1590 }
1591
1592 if (!Cij.is_null()) {
1593 if (Cij->isFillComplete())
1594 Cij->resumeFill();
1595 // Cij->fillComplete(rcpBopA->getDomainMap(j), rcpBopA->getRangeMap(i));
1596 Cij->fillComplete();
1597 bC->setMatrix(i, j, Cij);
1598 } else {
1599 bC->setMatrix(i, j, Teuchos::null);
1600 }
1601 } // loop over columns j
1602 } // loop over rows i
1603
1604 } // end blocked recursive algorithm
1605 } // MatrixMatrix::TwoMatrixAdd()
1606}; // end specialization on GO=long long and NO=EpetraNode
1607
1608#endif // HAVE_XPETRA_EPETRA
1609
1610} // end namespace Xpetra
1611
1612#define XPETRA_MATRIXMATRIX_SHORT
1613
1614#endif /* PACKAGES_XPETRA_SUP_UTILS_XPETRA_MATRIXMATRIX_DECL_HPP_ */
Copy
int GID(int LID) const
int IndexBase() const
int NumMyElements() const
bool Filled() const
const Epetra_Map & RowMap() const
const Epetra_Comm & Comm() const
const Epetra_Map & RangeMap() const
const Epetra_Map & ColMap() const
const Epetra_Map & DomainMap() const
MPI_Comm GetMpiComm() const
bool is_null() const
static RCP< Time > getNewTimer(const std::string &name)
virtual size_t Cols() const
number of column blocks
const RCP< const Map > getRangeMap() const
Returns the Map associated with the range of this operator.
Teuchos::RCP< Matrix > getMatrix(size_t r, size_t c) const
return block (r,c)
bool isFillComplete() const
Returns true if fillComplete() has been called and the matrix is in compute mode.
const RCP< const Map > getDomainMap() const
Returns the Map associated with the domain of this operator.
virtual size_t Rows() const
number of row blocks
RCP< const MapExtractor > getRangeMapExtractor() const
Returns map extractor class for range map.
RCP< const MapExtractor > getDomainMapExtractor() const
Returns map extractor for domain map.
Concrete implementation of Xpetra::Matrix.
Exception indicating invalid cast attempted.
Exception throws to report incompatible objects (like maps).
Exception throws to report errors in the internal logical of the program.
static void TwoMatrixAdd(const Matrix &A, bool transposeA, const SC &alpha, const Matrix &B, bool transposeB, const SC &beta, RCP< Matrix > &C, Teuchos::FancyOStream &fos, bool AHasFixedNnzPerRow=false)
Helper function to calculate C = alpha*A + beta*B.
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
Helper function to calculate B = alpha*A + beta*B.
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, RCP< Matrix > C_in, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
static RCP< BlockedCrsMatrix > TwoMatrixMultiplyBlock(const BlockedCrsMatrix &A, bool transposeA, const BlockedCrsMatrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true)
Helper function to do matrix-matrix multiply "in-place".
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool callFillCompleteOnResult=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
static RCP< Epetra_CrsMatrix > MLTwoMatrixMultiply(const Epetra_CrsMatrix &epA, const Epetra_CrsMatrix &epB, Teuchos::FancyOStream &fos)
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool callFillCompleteOnResult=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
static void TwoMatrixAdd(const Matrix &A, bool transposeA, SC alpha, Matrix &B, SC beta)
Helper function to calculate B = alpha*A + beta*B.
static RCP< Matrix > Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, RCP< Matrix > C_in, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
Helper function to do matrix-matrix multiply.
static void TwoMatrixAdd(const Matrix &A, bool transposeA, const SC &alpha, const Matrix &B, bool transposeB, const SC &beta, RCP< Matrix > &C, Teuchos::FancyOStream &fos, bool AHasFixedNnzPerRow=false)
Helper function to calculate C = alpha*A + beta*B.
static RCP< Epetra_CrsMatrix > MLTwoMatrixMultiply(const Epetra_CrsMatrix &, const Epetra_CrsMatrix &, Teuchos::FancyOStream &)
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
static RCP< BlockedCrsMatrix > TwoMatrixMultiplyBlock(const BlockedCrsMatrix &A, bool transposeA, const BlockedCrsMatrix &B, bool transposeB, Teuchos::FancyOStream &fos, bool doFillComplete=true, bool doOptimizeStorage=true)
Helper function to do matrix-matrix multiply "in-place".
Xpetra-specific matrix class.
virtual global_size_t getGlobalNumEntries() const =0
Returns the global number of entries in this matrix.
void CreateView(viewLabel_t viewLabel, const RCP< const Map > &rowMap, const RCP< const Map > &colMap)
virtual global_size_t getGlobalNumRows() const =0
Returns the number of global rows in this matrix.
virtual const RCP< const Map > & getRowMap() const
Returns the Map that describes the row distribution in this matrix.
virtual bool isFillComplete() const =0
Returns true if fillComplete() has been called and the matrix is in compute mode.
virtual size_t getLocalNumRows() const =0
Returns the number of matrix rows owned on the calling node.
virtual void fillComplete(const RCP< const Map > &domainMap, const RCP< const Map > &rangeMap, const RCP< ParameterList > &params=null)=0
Signal that data entry is complete, specifying domain and range maps.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
Returns the current number of entries on this node in the specified local row.
bool IsView(const viewLabel_t viewLabel) const
virtual size_t getLocalMaxNumRowEntries() const =0
Returns the maximum number of entries across all rows/columns on this node.
virtual LocalOrdinal GetStorageBlockSize() const =0
Returns the block size of the storage mechanism, which is usually 1, except for Tpetra::BlockCrsMatri...
virtual const Teuchos::RCP< const map_type > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X....
virtual const Teuchos::RCP< const map_type > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y....
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
TypeTo as(const TypeFrom &t)
#define TEUCHOS_UNREACHABLE_RETURN(dummyReturnVal)
std::string toString(const T &t)
basic_FancyOStream< char > FancyOStream
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Tpetra::KokkosCompat::KokkosSerialWrapperNode EpetraNode
RCP< Xpetra::CrsMatrixWrap< SC, LO, GO, NO > > Convert_Epetra_CrsMatrix_ToXpetra_CrsMatrixWrap(RCP< Epetra_CrsMatrix > &)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)