Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Relaxation_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_RELAXATION_DEF_HPP
11#define IFPACK2_RELAXATION_DEF_HPP
12
13#include "Teuchos_StandardParameterEntryValidators.hpp"
14#include "Teuchos_TimeMonitor.hpp"
15#include "Tpetra_CrsMatrix.hpp"
16#include "Tpetra_BlockCrsMatrix.hpp"
17#include "Tpetra_BlockView.hpp"
18#include "Ifpack2_Utilities.hpp"
19#include "Ifpack2_Details_getCrsMatrix.hpp"
20#include "MatrixMarket_Tpetra.hpp"
21#include "Tpetra_Details_residual.hpp"
22#include <cstdlib>
23#include <memory>
24#include <sstream>
25#include "KokkosSparse_gauss_seidel.hpp"
26
27namespace {
28 // Validate that a given int is nonnegative.
29 class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
30 public:
31 // Constructor (does nothing).
32 NonnegativeIntValidator () {}
33
34 // ParameterEntryValidator wants this method.
35 Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
36 return Teuchos::null;
37 }
38
39 // Actually validate the parameter's value.
40 void
41 validate (const Teuchos::ParameterEntry& entry,
42 const std::string& paramName,
43 const std::string& sublistName) const
44 {
45 using std::endl;
46 Teuchos::any anyVal = entry.getAny (true);
47 const std::string entryName = entry.getAny (false).typeName ();
48
49 TEUCHOS_TEST_FOR_EXCEPTION(
50 anyVal.type () != typeid (int),
51 Teuchos::Exceptions::InvalidParameterType,
52 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
53 << "\" has the wrong type." << endl << "Parameter: " << paramName
54 << endl << "Type specified: " << entryName << endl
55 << "Type required: int" << endl);
56
57 const int val = Teuchos::any_cast<int> (anyVal);
58 TEUCHOS_TEST_FOR_EXCEPTION(
59 val < 0, Teuchos::Exceptions::InvalidParameterValue,
60 "Parameter \"" << paramName << "\" in sublist \"" << sublistName
61 << "\" is negative." << endl << "Parameter: " << paramName
62 << endl << "Value specified: " << val << endl
63 << "Required range: [0, INT_MAX]" << endl);
64 }
65
66 // ParameterEntryValidator wants this method.
67 const std::string getXMLTypeName () const {
68 return "NonnegativeIntValidator";
69 }
70
71 // ParameterEntryValidator wants this method.
72 void
73 printDoc (const std::string& docString,
74 std::ostream &out) const
75 {
76 Teuchos::StrUtils::printLines (out, "# ", docString);
77 out << "#\tValidator Used: " << std::endl;
78 out << "#\t\tNonnegativeIntValidator" << std::endl;
79 }
80 };
81
82 // A way to get a small positive number (eps() for floating-point
83 // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
84 // define it (for example, for integer values).
85 template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
86 class SmallTraits {
87 public:
88 // Return eps if Scalar is a floating-point type, else return 1.
89 static const Scalar eps ();
90 };
91
92 // Partial specialization for when Scalar is not a floating-point type.
93 template<class Scalar>
94 class SmallTraits<Scalar, true> {
95 public:
96 static const Scalar eps () {
97 return Teuchos::ScalarTraits<Scalar>::one ();
98 }
99 };
100
101 // Partial specialization for when Scalar is a floating-point type.
102 template<class Scalar>
103 class SmallTraits<Scalar, false> {
104 public:
105 static const Scalar eps () {
106 return Teuchos::ScalarTraits<Scalar>::eps ();
107 }
108 };
109
110 // Work-around for GitHub Issue #5269.
111 template<class Scalar,
112 const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
113 struct RealTraits {};
114
115 template<class Scalar>
116 struct RealTraits<Scalar, false> {
117 using val_type = Scalar;
118 using mag_type = Scalar;
119 static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
120 return z;
121 }
122 };
123
124 template<class Scalar>
125 struct RealTraits<Scalar, true> {
126 using val_type = Scalar;
127 using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
128 static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
129 return Kokkos::ArithTraits<val_type>::real (z);
130 }
131 };
132
133 template<class Scalar>
134 KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
135 getRealValue (const Scalar& z) {
136 return RealTraits<Scalar>::real (z);
137 }
138
139} // namespace (anonymous)
140
141namespace Ifpack2 {
142
143template<class MatrixType>
144void
146updateCachedMultiVector (const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
147 size_t numVecs) const
148{
149 // Allocate a multivector if the cached one isn't perfect. Checking
150 // for map pointer equality is much cheaper than Map::isSameAs.
151 if (cachedMV_.is_null () ||
152 map.get () != cachedMV_->getMap ().get () ||
153 cachedMV_->getNumVectors () != numVecs) {
154 using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
156 cachedMV_ = Teuchos::rcp (new MV (map, numVecs, false));
157 }
158}
159
160template<class MatrixType>
162setMatrix(const Teuchos::RCP<const row_matrix_type>& A)
163{
164 if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
165 Importer_ = Teuchos::null;
166 pointImporter_ = Teuchos::null;
167 Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
168 isInitialized_ = false;
169 IsComputed_ = false;
170 diagOffsets_ = Kokkos::View<size_t*, device_type>();
171 savedDiagOffsets_ = false;
172 hasBlockCrsMatrix_ = false;
173 serialGaussSeidel_ = Teuchos::null;
174 if (! A.is_null ()) {
175 IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
176 }
177 A_ = A;
178 }
179}
180
181template<class MatrixType>
183Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
184: A_ (A),
185 IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
186 false : // a reasonable default if there's no communicator
187 A->getRowMap ()->getComm ()->getSize () > 1)
188{
189 this->setObjectLabel ("Ifpack2::Relaxation");
190}
191
192
193template<class MatrixType>
194Teuchos::RCP<const Teuchos::ParameterList>
196{
197 using Teuchos::Array;
198 using Teuchos::ParameterList;
199 using Teuchos::parameterList;
200 using Teuchos::RCP;
201 using Teuchos::rcp;
202 using Teuchos::rcp_const_cast;
203 using Teuchos::rcp_implicit_cast;
204 using Teuchos::setStringToIntegralParameter;
205 typedef Teuchos::ParameterEntryValidator PEV;
206
207 if (validParams_.is_null ()) {
208 RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
209
210 // Set a validator that automatically converts from the valid
211 // string options to their enum values.
212 Array<std::string> precTypes (8);
213 precTypes[0] = "Jacobi";
214 precTypes[1] = "Gauss-Seidel";
215 precTypes[2] = "Symmetric Gauss-Seidel";
216 precTypes[3] = "MT Gauss-Seidel";
217 precTypes[4] = "MT Symmetric Gauss-Seidel";
218 precTypes[5] = "Richardson";
219 precTypes[6] = "Two-stage Gauss-Seidel";
220 precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
221 Array<Details::RelaxationType> precTypeEnums (8);
222 precTypeEnums[0] = Details::JACOBI;
223 precTypeEnums[1] = Details::GS;
224 precTypeEnums[2] = Details::SGS;
225 precTypeEnums[3] = Details::MTGS;
226 precTypeEnums[4] = Details::MTSGS;
227 precTypeEnums[5] = Details::RICHARDSON;
228 precTypeEnums[6] = Details::GS2;
229 precTypeEnums[7] = Details::SGS2;
230 const std::string defaultPrecType ("Jacobi");
231 setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
232 defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
233 pl.getRawPtr ());
234
235 const int numSweeps = 1;
236 RCP<PEV> numSweepsValidator =
237 rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
238 pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
239 rcp_const_cast<const PEV> (numSweepsValidator));
240
241 // number of 'local' outer sweeps for two-stage GS
242 const int numOuterSweeps = 1;
243 RCP<PEV> numOuterSweepsValidator =
244 rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
245 pl->set ("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
246 rcp_const_cast<const PEV> (numOuterSweepsValidator));
247 // number of 'local' inner sweeps for two-stage GS
248 const int numInnerSweeps = 1;
249 RCP<PEV> numInnerSweepsValidator =
250 rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
251 pl->set ("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
252 rcp_const_cast<const PEV> (numInnerSweepsValidator));
253 // specify damping factor for the inner sweeps of two-stage GS
254 const scalar_type innerDampingFactor = STS::one ();
255 pl->set ("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
256 // specify whether to use sptrsv instead of inner-iterations for two-stage GS
257 const bool innerSpTrsv = false;
258 pl->set ("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
259 // specify whether to use compact form of recurrence for two-stage GS
260 const bool compactForm = false;
261 pl->set ("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
262
263 const scalar_type dampingFactor = STS::one ();
264 pl->set ("relaxation: damping factor", dampingFactor);
265
266 const bool zeroStartingSolution = true;
267 pl->set ("relaxation: zero starting solution", zeroStartingSolution);
268
269 const bool doBackwardGS = false;
270 pl->set ("relaxation: backward mode", doBackwardGS);
271
272 const bool doL1Method = false;
273 pl->set ("relaxation: use l1", doL1Method);
274
275 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
276 (STM::one() + STM::one()); // 1.5
277 pl->set ("relaxation: l1 eta", l1eta);
278
279 const scalar_type minDiagonalValue = STS::zero ();
280 pl->set ("relaxation: min diagonal value", minDiagonalValue);
281
282 const bool fixTinyDiagEntries = false;
283 pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
284
285 const bool checkDiagEntries = false;
286 pl->set ("relaxation: check diagonal entries", checkDiagEntries);
287
288 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
289 pl->set("relaxation: local smoothing indices", localSmoothingIndices);
290
291 const bool is_matrix_structurally_symmetric = false;
292 pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
293
294 const bool ifpack2_dump_matrix = false;
295 pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
296
297 const int cluster_size = 1;
298 pl->set("relaxation: mtgs cluster size", cluster_size);
299
300 pl->set("relaxation: mtgs coloring algorithm", "Default");
301
302 const int long_row_threshold = 0;
303 pl->set("relaxation: long row threshold", long_row_threshold);
304
305 const bool timer_for_apply = true;
306 pl->set("timer for apply", timer_for_apply);
307
308 validParams_ = rcp_const_cast<const ParameterList> (pl);
309 }
310 return validParams_;
311}
312
313
314template<class MatrixType>
315void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
316{
317 using Teuchos::getIntegralValue;
318 using Teuchos::getStringValue;
319 using Teuchos::ParameterList;
320 using Teuchos::RCP;
321 typedef scalar_type ST; // just to make code below shorter
322
323 if (pl.isType<double>("relaxation: damping factor")) {
324 // Make sure that ST=complex can run with a damping factor that is
325 // a double.
326 ST df = pl.get<double>("relaxation: damping factor");
327 pl.remove("relaxation: damping factor");
328 pl.set("relaxation: damping factor",df);
329 }
330
331 pl.validateParametersAndSetDefaults (* getValidParameters ());
332
333 const Details::RelaxationType precType =
334 getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
335 const std::string precTypeStr = getStringValue<Details::RelaxationType>(pl, "relaxation: type");
336 // We only access "relaxation: type" using strings in the rest of the code
337 pl.set<std::string>("relaxation: type", precTypeStr);
338 pl.get<std::string>("relaxation: type"); // We need to mark the parameter as "used"
339 const int numSweeps = pl.get<int> ("relaxation: sweeps");
340 const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
341 const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
342 const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
343 const bool doL1Method = pl.get<bool> ("relaxation: use l1");
344 const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
345 const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
346 const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
347 const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
348 const bool is_matrix_structurally_symmetric = pl.get<bool> ("relaxation: symmetric matrix structure");
349 const bool ifpack2_dump_matrix = pl.get<bool> ("relaxation: ifpack2 dump matrix");
350 const bool timer_for_apply = pl.get<bool> ("timer for apply");
351 int cluster_size = 1;
352 if(pl.isParameter ("relaxation: mtgs cluster size")) //optional parameter
353 cluster_size = pl.get<int> ("relaxation: mtgs cluster size");
354 int long_row_threshold = 0;
355 if(pl.isParameter ("relaxation: long row threshold")) //optional parameter
356 long_row_threshold = pl.get<int> ("relaxation: long row threshold");
357 std::string color_algo_name = pl.get<std::string>("relaxation: mtgs coloring algorithm");
358 //convert to lowercase
359 for(char& c : color_algo_name)
360 c = tolower(c);
361 if(color_algo_name == "default")
362 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_DEFAULT;
363 else if(color_algo_name == "serial")
364 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_SERIAL;
365 else if(color_algo_name == "vb")
366 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VB;
367 else if(color_algo_name == "vbbit")
368 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBBIT;
369 else if(color_algo_name == "vbcs")
370 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBCS;
371 else if(color_algo_name == "vbd")
372 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBD;
373 else if(color_algo_name == "vbdbit")
374 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_VBDBIT;
375 else if(color_algo_name == "eb")
376 this->mtColoringAlgorithm_ = KokkosGraph::COLORING_EB;
377 else
378 {
379 std::ostringstream msg;
380 msg << "Ifpack2::Relaxation: 'relaxation: mtgs coloring algorithm' = '" << color_algo_name << "' is not valid.\n";
381 msg << "Choices (not case sensitive) are: Default, Serial, VB, VBBIT, VBCS, VBD, VBDBIT, EB.";
382 TEUCHOS_TEST_FOR_EXCEPTION(
383 true, std::invalid_argument, msg.str());
384 }
385
386 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
387
388 // for Two-stage Gauss-Seidel
389 if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
390 // Make sure that ST=complex can run with a damping factor that is
391 // a double.
392 ST df = pl.get<double>("relaxation: inner damping factor");
393 pl.remove("relaxation: inner damping factor");
394 pl.set("relaxation: inner damping factor",df);
395 }
396 //If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
397 if (long_row_threshold > 0) {
398 TEUCHOS_TEST_FOR_EXCEPTION(
399 cluster_size != 1, std::invalid_argument, "Ifpack2::Relaxation: "
400 "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
401 TEUCHOS_TEST_FOR_EXCEPTION(
402 precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
403 std::invalid_argument, "Ifpack2::Relaxation: "
404 "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
405 "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
406 }
407
408 const ST innerDampingFactor = pl.get<ST> ("relaxation: inner damping factor");
409 const int numInnerSweeps = pl.get<int> ("relaxation: inner sweeps");
410 const int numOuterSweeps = pl.get<int> ("relaxation: outer sweeps");
411 const bool innerSpTrsv = pl.get<bool> ("relaxation: inner sparse-triangular solve");
412 const bool compactForm = pl.get<bool> ("relaxation: compact form");
413
414 // "Commit" the changes, now that we've validated everything.
415 PrecType_ = precType;
416 NumSweeps_ = numSweeps;
417 DampingFactor_ = dampingFactor;
418 ZeroStartingSolution_ = zeroStartSol;
419 DoBackwardGS_ = doBackwardGS;
420 DoL1Method_ = doL1Method;
421 L1Eta_ = l1Eta;
422 MinDiagonalValue_ = minDiagonalValue;
423 fixTinyDiagEntries_ = fixTinyDiagEntries;
424 checkDiagEntries_ = checkDiagEntries;
425 clusterSize_ = cluster_size;
426 longRowThreshold_ = long_row_threshold;
427 is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
428 ifpack2_dump_matrix_ = ifpack2_dump_matrix;
429 localSmoothingIndices_ = localSmoothingIndices;
430 // for Two-stage GS
431 NumInnerSweeps_ = numInnerSweeps;
432 NumOuterSweeps_ = numOuterSweeps;
433 InnerSpTrsv_ = innerSpTrsv;
434 InnerDampingFactor_ = innerDampingFactor;
435 CompactForm_ = compactForm;
436 TimerForApply_ = timer_for_apply;
437}
438
439
440template<class MatrixType>
441void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
442{
443 // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
444 // but otherwise, we will get [unused] in pl
445 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
446}
447
448
449template<class MatrixType>
450Teuchos::RCP<const Teuchos::Comm<int> >
452 TEUCHOS_TEST_FOR_EXCEPTION(
453 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
454 "The input matrix A is null. Please call setMatrix() with a nonnull "
455 "input matrix before calling this method.");
456 return A_->getRowMap ()->getComm ();
457}
458
459
460template<class MatrixType>
461Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
463 return A_;
464}
465
466
467template<class MatrixType>
468Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
469 typename MatrixType::global_ordinal_type,
470 typename MatrixType::node_type> >
472 TEUCHOS_TEST_FOR_EXCEPTION(
473 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
474 "The input matrix A is null. Please call setMatrix() with a nonnull "
475 "input matrix before calling this method.");
476 return A_->getDomainMap ();
477}
478
479
480template<class MatrixType>
481Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
482 typename MatrixType::global_ordinal_type,
483 typename MatrixType::node_type> >
485 TEUCHOS_TEST_FOR_EXCEPTION(
486 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
487 "The input matrix A is null. Please call setMatrix() with a nonnull "
488 "input matrix before calling this method.");
489 return A_->getRangeMap ();
490}
491
492
493template<class MatrixType>
495 return true;
496}
497
498
499template<class MatrixType>
501 return(NumInitialize_);
502}
503
504
505template<class MatrixType>
507 return(NumCompute_);
508}
509
510
511template<class MatrixType>
513 return(NumApply_);
514}
515
516
517template<class MatrixType>
519 return(InitializeTime_);
520}
521
522
523template<class MatrixType>
525 return(ComputeTime_);
526}
527
528
529template<class MatrixType>
531 return(ApplyTime_);
532}
533
534
535template<class MatrixType>
537 return(ComputeFlops_);
538}
539
540
541template<class MatrixType>
543 return(ApplyFlops_);
544}
545
546
547
548template<class MatrixType>
550 TEUCHOS_TEST_FOR_EXCEPTION(
551 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
552 "The input matrix A is null. Please call setMatrix() with a nonnull "
553 "input matrix, then call compute(), before calling this method.");
554 // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
555 return A_->getLocalNumRows() + A_->getLocalNumEntries();
556}
557
558
559template<class MatrixType>
560void
562apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
563 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
564 Teuchos::ETransp /* mode */,
565 scalar_type alpha,
566 scalar_type beta) const
567{
568 using Teuchos::as;
569 using Teuchos::RCP;
570 using Teuchos::rcp;
571 using Teuchos::rcpFromRef;
572 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
574 TEUCHOS_TEST_FOR_EXCEPTION(
575 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
576 "The input matrix A is null. Please call setMatrix() with a nonnull "
577 "input matrix, then call compute(), before calling this method.");
578 TEUCHOS_TEST_FOR_EXCEPTION(
579 ! isComputed (),
580 std::runtime_error,
581 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
582 "preconditioner instance before you may call apply(). You may call "
583 "isComputed() to find out if compute() has been called already.");
584 TEUCHOS_TEST_FOR_EXCEPTION(
585 X.getNumVectors() != Y.getNumVectors(),
586 std::runtime_error,
587 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
588 "X has " << X.getNumVectors() << " columns, but Y has "
589 << Y.getNumVectors() << " columns.");
590 TEUCHOS_TEST_FOR_EXCEPTION(
591 beta != STS::zero (), std::logic_error,
592 "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
593 "implemented.");
594
595 Teuchos::RCP<Teuchos::Time> timer;
596 const std::string timerName ("Ifpack2::Relaxation::apply");
597 if (TimerForApply_) {
598 timer = Teuchos::TimeMonitor::lookupCounter (timerName);
599 if (timer.is_null ()) {
600 timer = Teuchos::TimeMonitor::getNewCounter (timerName);
601 }
602 }
603
604 Teuchos::Time time = Teuchos::Time(timerName);
605 double startTime = time.wallTime();
606
607 {
608 Teuchos::RCP<Teuchos::TimeMonitor> timeMon;
609 if (TimerForApply_)
610 timeMon = Teuchos::rcp(new Teuchos::TimeMonitor(*timer));
611
612 // Special case: alpha == 0.
613 if (alpha == STS::zero ()) {
614 // No floating-point operations, so no need to update a count.
615 Y.putScalar (STS::zero ());
616 }
617 else {
618 // If X and Y alias one another, then we need to create an
619 // auxiliary vector, Xcopy (a deep copy of X).
620 RCP<const MV> Xcopy;
621 {
622 if (X.aliases(Y)) {
623 Xcopy = rcp (new MV (X, Teuchos::Copy));
624 } else {
625 Xcopy = rcpFromRef (X);
626 }
627 }
628
629 // Each of the following methods updates the flop count itself.
630 // All implementations handle zeroing out the starting solution
631 // (if necessary) themselves.
632 switch (PrecType_) {
633 case Ifpack2::Details::JACOBI:
634 ApplyInverseJacobi(*Xcopy,Y);
635 break;
636 case Ifpack2::Details::GS:
637 ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
638 break;
639 case Ifpack2::Details::SGS:
640 ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
641 break;
642 case Ifpack2::Details::MTGS:
643 case Ifpack2::Details::GS2:
644 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
645 break;
646 case Ifpack2::Details::MTSGS:
647 case Ifpack2::Details::SGS2:
648 ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
649 break;
650 case Ifpack2::Details::RICHARDSON:
651 ApplyInverseRichardson(*Xcopy,Y);
652 break;
653
654 default:
655 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
656 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
657 << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
658 }
659 if (alpha != STS::one ()) {
660 Y.scale (alpha);
661 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
662 const double numVectors = as<double> (Y.getNumVectors ());
663 ApplyFlops_ += numGlobalRows * numVectors;
664 }
665 }
666 }
667 ApplyTime_ += (time.wallTime() - startTime);
668 ++NumApply_;
669}
670
671
672template<class MatrixType>
673void
675applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
676 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
677 Teuchos::ETransp mode) const
678{
679 TEUCHOS_TEST_FOR_EXCEPTION(
680 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
681 "The input matrix A is null. Please call setMatrix() with a nonnull "
682 "input matrix, then call compute(), before calling this method.");
683 TEUCHOS_TEST_FOR_EXCEPTION(
684 ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
685 "isComputed() must be true before you may call applyMat(). "
686 "Please call compute() before calling this method.");
687 TEUCHOS_TEST_FOR_EXCEPTION(
688 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
689 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
690 << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
691 A_->apply (X, Y, mode);
692}
693
694
695template<class MatrixType>
697{
698 const char methodName[] = "Ifpack2::Relaxation::initialize";
699
700 TEUCHOS_TEST_FOR_EXCEPTION
701 (A_.is_null (), std::runtime_error, methodName << ": The "
702 "input matrix A is null. Please call setMatrix() with "
703 "a nonnull input matrix before calling this method.");
704
705 Teuchos::RCP<Teuchos::Time> timer =
706 Teuchos::TimeMonitor::getNewCounter (methodName);
707
708 double startTime = timer->wallTime();
709
710 { // Timing of initialize starts here
711 Teuchos::TimeMonitor timeMon (*timer);
712 isInitialized_ = false;
713
714 {
715 auto rowMap = A_->getRowMap ();
716 auto comm = rowMap.is_null () ? Teuchos::null : rowMap->getComm ();
717 IsParallel_ = ! comm.is_null () && comm->getSize () > 1;
718 }
719
720 // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
721 // but in that case, the domain and column Maps are the same and
722 // we don't need to Import anyway.
723 //
724 // mfh 07 May 2019: A_->getGraph() might be an
725 // OverlappingRowGraph, which doesn't have an Import object.
726 // However, in that case, the comm should have size 1.
727 Importer_ = IsParallel_ ? A_->getGraph ()->getImporter () :
728 Teuchos::null;
729
730 {
731 Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
732 Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
733 hasBlockCrsMatrix_ = ! A_bcrs.is_null ();
734 }
735
736 serialGaussSeidel_ = Teuchos::null;
737
738 if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
739 PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
740 auto crsMat = Details::getCrsMatrix(A_);
741 TEUCHOS_TEST_FOR_EXCEPTION
742 (crsMat.is_null(), std::logic_error, methodName << ": "
743 "Multithreaded Gauss-Seidel methods currently only work "
744 "when the input matrix is a Tpetra::CrsMatrix.");
745
746 // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
747 // compute, not initialize, since users may change the matrix's
748 // values at any time before calling compute.
749 if (ifpack2_dump_matrix_) {
750 static int sequence_number = 0;
751 const std::string file_name = "Ifpack2_MT_GS_" +
752 std::to_string (sequence_number++) + ".mtx";
753 using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
754 writer_type::writeSparseFile (file_name, crsMat);
755 }
756
757 this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
758 if (mtKernelHandle_->get_gs_handle () == nullptr) {
759 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
760 mtKernelHandle_->create_gs_handle (KokkosSparse::GS_TWOSTAGE);
761 else if(this->clusterSize_ == 1) {
762 mtKernelHandle_->create_gs_handle (KokkosSparse::GS_DEFAULT, this->mtColoringAlgorithm_);
763 mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
764 }
765 else
766 mtKernelHandle_->create_gs_handle (KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_, this->mtColoringAlgorithm_);
767 }
768 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
769 if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
770 // set parameters for two-stage GS
771 mtKernelHandle_->set_gs_set_num_inner_sweeps (NumInnerSweeps_);
772 mtKernelHandle_->set_gs_set_num_outer_sweeps (NumOuterSweeps_);
773 mtKernelHandle_->set_gs_set_inner_damp_factor (InnerDampingFactor_);
774 mtKernelHandle_->set_gs_twostage (!InnerSpTrsv_, A_->getLocalNumRows ());
775 mtKernelHandle_->set_gs_twostage_compact_form (CompactForm_);
776 }
777
778 KokkosSparse::Experimental::gauss_seidel_symbolic(
779 mtKernelHandle_.getRawPtr (),
780 A_->getLocalNumRows (),
781 A_->getLocalNumCols (),
782 kcsr.graph.row_map,
783 kcsr.graph.entries,
784 is_matrix_structurally_symmetric_);
785 }
786 } // timing of initialize stops here
787
788 InitializeTime_ += (timer->wallTime() - startTime);
789 ++NumInitialize_;
790 isInitialized_ = true;
791}
792
793namespace Impl {
794template <typename BlockDiagView>
795struct InvertDiagBlocks {
796 typedef typename BlockDiagView::size_type Size;
797
798private:
799 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
800 template <typename View>
801 using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
802 typename View::device_type, Unmanaged>;
803
804 typedef typename BlockDiagView::non_const_value_type Scalar;
805 typedef typename BlockDiagView::device_type Device;
806 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
807 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
808
809 UnmanagedView<BlockDiagView> block_diag_;
810 // TODO Use thread team and scratch memory space. In this first
811 // pass, provide workspace for each block.
812 RWrk rwrk_buf_;
813 UnmanagedView<RWrk> rwrk_;
814 IWrk iwrk_buf_;
815 UnmanagedView<IWrk> iwrk_;
816
817public:
818 InvertDiagBlocks (BlockDiagView& block_diag)
819 : block_diag_(block_diag)
820 {
821 const auto blksz = block_diag.extent(1);
822 Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
823 rwrk_ = rwrk_buf_;
824 Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
825 iwrk_ = iwrk_buf_;
826 }
827
828 KOKKOS_INLINE_FUNCTION
829 void operator() (const Size i, int& jinfo) const {
830 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
831 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
832 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
833 int info = 0;
834 Tpetra::GETF2(D_cur, ipiv, info);
835 if (info) {
836 ++jinfo;
837 return;
838 }
839 Tpetra::GETRI(D_cur, ipiv, work, info);
840 if (info) ++jinfo;
841 }
842};
843}
844
845template<class MatrixType>
846void Relaxation<MatrixType>::computeBlockCrs ()
847{
848 using Kokkos::ALL;
849 using Teuchos::Array;
850 using Teuchos::ArrayRCP;
851 using Teuchos::ArrayView;
852 using Teuchos::as;
853 using Teuchos::Comm;
854 using Teuchos::RCP;
855 using Teuchos::rcp;
856 using Teuchos::REDUCE_MAX;
857 using Teuchos::REDUCE_MIN;
858 using Teuchos::REDUCE_SUM;
859 using Teuchos::rcp_dynamic_cast;
860 using Teuchos::reduceAll;
861 typedef local_ordinal_type LO;
862
863 const std::string timerName ("Ifpack2::Relaxation::computeBlockCrs");
864 Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
865 if (timer.is_null ()) {
866 timer = Teuchos::TimeMonitor::getNewCounter (timerName);
867 }
868 double startTime = timer->wallTime();
869 {
870 Teuchos::TimeMonitor timeMon (*timer);
871
872 TEUCHOS_TEST_FOR_EXCEPTION(
873 A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
874 "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
875 "with a nonnull input matrix, then call initialize(), before calling "
876 "this method.");
877 auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type> (A_);
878 TEUCHOS_TEST_FOR_EXCEPTION(
879 blockCrsA.is_null(), std::logic_error, "Ifpack2::Relaxation::"
880 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
881 "got this far. Please report this bug to the Ifpack2 developers.");
882
883 const scalar_type one = STS::one ();
884
885 // Reset state.
886 IsComputed_ = false;
887
888 const LO lclNumMeshRows =
889 blockCrsA->getCrsGraph ().getLocalNumRows ();
890 const LO blockSize = blockCrsA->getBlockSize ();
891
892 if (! savedDiagOffsets_) {
893 blockDiag_ = block_diag_type (); // clear it before reallocating
894 blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
895 lclNumMeshRows, blockSize, blockSize);
896 if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
897 // Clear diagOffsets_ first (by assigning an empty View to it)
898 // to save memory, before reallocating.
899 diagOffsets_ = Kokkos::View<size_t*, device_type> ();
900 diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
901 }
902 blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
903 TEUCHOS_TEST_FOR_EXCEPTION
904 (static_cast<size_t> (diagOffsets_.extent (0)) !=
905 static_cast<size_t> (blockDiag_.extent (0)),
906 std::logic_error, "diagOffsets_.extent(0) = " <<
907 diagOffsets_.extent (0) << " != blockDiag_.extent(0) = "
908 << blockDiag_.extent (0) <<
909 ". Please report this bug to the Ifpack2 developers.");
910 savedDiagOffsets_ = true;
911 }
912 blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
913
914 // Use an unmanaged View in this method, so that when we take
915 // subviews of it (to get each diagonal block), we don't have to
916 // touch the reference count. Reference count updates are a
917 // thread scalability bottleneck and have a performance cost even
918 // without using threads.
919 unmanaged_block_diag_type blockDiag = blockDiag_;
920
921 if (DoL1Method_ && IsParallel_) {
922 const scalar_type two = one + one;
923 const size_t maxLength = A_->getLocalMaxNumRowEntries ();
924 nonconst_local_inds_host_view_type indices ("indices",maxLength);
925 nonconst_values_host_view_type values_ ("values",maxLength * blockSize * blockSize);
926 size_t numEntries = 0;
927
928 for (LO i = 0; i < lclNumMeshRows; ++i) {
929 // FIXME (mfh 16 Dec 2015) Get views instead of copies.
930 blockCrsA->getLocalRowCopy (i, indices, values_, numEntries);
931 scalar_type * values = reinterpret_cast<scalar_type*>(values_.data());
932
933 auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
934 for (LO subRow = 0; subRow < blockSize; ++subRow) {
935 magnitude_type diagonal_boost = STM::zero ();
936 for (size_t k = 0 ; k < numEntries ; ++k) {
937 if (indices[k] >= lclNumMeshRows) {
938 const size_t offset = blockSize*blockSize*k + subRow*blockSize;
939 for (LO subCol = 0; subCol < blockSize; ++subCol) {
940 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
941 }
942 }
943 }
944 if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
945 diagBlock(subRow, subRow) += diagonal_boost;
946 }
947 }
948 }
949 }
950
951 int info = 0;
952 {
953 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
954 typedef typename unmanaged_block_diag_type::execution_space exec_space;
955 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
956
957 Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
958 // FIXME (mfh 19 May 2017) Different processes may not
959 // necessarily have this error all at the same time, so it would
960 // be better just to preserve a local error state and let users
961 // check.
962 TEUCHOS_TEST_FOR_EXCEPTION
963 (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
964 << " diagonal blocks.");
965 }
966
967 // In a debug build, do an extra test to make sure that all the
968 // factorizations were computed correctly.
969#ifdef HAVE_IFPACK2_DEBUG
970 const int numResults = 2;
971 // Use "max = -min" trick to get min and max in a single all-reduce.
972 int lclResults[2], gblResults[2];
973 lclResults[0] = info;
974 lclResults[1] = -info;
975 gblResults[0] = 0;
976 gblResults[1] = 0;
977 reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
978 numResults, lclResults, gblResults);
979 TEUCHOS_TEST_FOR_EXCEPTION
980 (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
981 "Ifpack2::Relaxation::compute: When processing the input "
982 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
983 "failed on one or more (MPI) processes.");
984#endif // HAVE_IFPACK2_DEBUG
985 serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
986 } // end TimeMonitor scope
987
988 ComputeTime_ += (timer->wallTime() - startTime);
989 ++NumCompute_;
990 IsComputed_ = true;
991}
992
993template<class MatrixType>
995{
996 using Teuchos::Array;
997 using Teuchos::ArrayRCP;
998 using Teuchos::ArrayView;
999 using Teuchos::as;
1000 using Teuchos::Comm;
1001 using Teuchos::RCP;
1002 using Teuchos::rcp;
1003 using Teuchos::REDUCE_MAX;
1004 using Teuchos::REDUCE_MIN;
1005 using Teuchos::REDUCE_SUM;
1006 using Teuchos::reduceAll;
1007 using LO = local_ordinal_type;
1008 using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
1010 using IST = typename vector_type::impl_scalar_type;
1011 using KAT = Kokkos::ArithTraits<IST>;
1012
1013 const char methodName[] = "Ifpack2::Relaxation::compute";
1014 const scalar_type zero = STS::zero ();
1015 const scalar_type one = STS::one ();
1016
1017 TEUCHOS_TEST_FOR_EXCEPTION
1018 (A_.is_null (), std::runtime_error, methodName << ": "
1019 "The input matrix A is null. Please call setMatrix() with a nonnull "
1020 "input matrix, then call initialize(), before calling this method.");
1021
1022 // We don't count initialization in compute() time.
1023 if (! isInitialized ()) {
1024 initialize ();
1025 }
1026
1027 if (hasBlockCrsMatrix_) {
1028 computeBlockCrs ();
1029 return;
1030 }
1031
1032 Teuchos::RCP<Teuchos::Time> timer =
1033 Teuchos::TimeMonitor::getNewCounter (methodName);
1034 double startTime = timer->wallTime();
1035
1036 { // Timing of compute starts here.
1037 Teuchos::TimeMonitor timeMon (*timer);
1038
1039 // Precompute some quantities for "fixing" zero or tiny diagonal
1040 // entries. We'll only use them if this "fixing" is enabled.
1041 //
1042 // SmallTraits covers for the lack of eps() in
1043 // Teuchos::ScalarTraits when its template parameter is not a
1044 // floating-point type. (Ifpack2 sometimes gets instantiated for
1045 // integer Scalar types.)
1046 IST oneOverMinDiagVal = KAT::one () / static_cast<IST> (SmallTraits<scalar_type>::eps ());
1047 if ( MinDiagonalValue_ != zero)
1048 oneOverMinDiagVal = KAT::one () / static_cast<IST> (MinDiagonalValue_);
1049
1050 // It's helpful not to have to recompute this magnitude each time.
1051 const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1052
1053 const LO numMyRows = static_cast<LO> (A_->getLocalNumRows ());
1054
1055 TEUCHOS_TEST_FOR_EXCEPTION
1056 (NumSweeps_ < 0, std::logic_error, methodName
1057 << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1058 "Please report this bug to the Ifpack2 developers.");
1059 IsComputed_ = false;
1060
1061 if (Diagonal_.is_null()) {
1062 // A_->getLocalDiagCopy fills in all Vector entries, even if the
1063 // matrix has no stored entries in the corresponding rows.
1064 Diagonal_ = rcp (new vector_type (A_->getRowMap (), false));
1065 }
1066
1067 if (checkDiagEntries_) {
1068 //
1069 // Check diagonal elements, replace zero elements with the minimum
1070 // diagonal value, and store their inverses. Count the number of
1071 // "small" and zero diagonal entries while we're at it.
1072 //
1073 size_t numSmallDiagEntries = 0; // "small" includes zero
1074 size_t numZeroDiagEntries = 0; // # zero diagonal entries
1075 size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1076 magnitude_type minMagDiagEntryMag = STM::zero ();
1077 magnitude_type maxMagDiagEntryMag = STM::zero ();
1078
1079 // FIXME: We are extracting the diagonal more than once. That
1080 // isn't very efficient, but we shouldn't be checking
1081 // diagonal entries at scale anyway.
1082 A_->getLocalDiagCopy (*Diagonal_); // slow path for row matrix
1083 std::unique_ptr<vector_type> origDiag;
1084 origDiag = std::unique_ptr<vector_type> (new vector_type (*Diagonal_, Teuchos::Copy));
1085 auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1086 auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1087 // As we go, keep track of the diagonal entries with the
1088 // least and greatest magnitude. We could use the trick of
1089 // starting min with +Inf and max with -Inf, but that
1090 // doesn't work if scalar_type is a built-in integer type.
1091 // Thus, we have to start by reading the first diagonal
1092 // entry redundantly.
1093 if (numMyRows != 0) {
1094 const magnitude_type d_0_mag = KAT::abs (diag(0));
1095 minMagDiagEntryMag = d_0_mag;
1096 maxMagDiagEntryMag = d_0_mag;
1097 }
1098
1099 // Go through all the diagonal entries. Compute counts of
1100 // small-magnitude, zero, and negative-real-part entries.
1101 // Invert the diagonal entries that aren't too small. For
1102 // those too small in magnitude, replace them with
1103 // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1104 // happens to be zero).
1105 for (LO i = 0; i < numMyRows; ++i) {
1106 const IST d_i = diag(i);
1107 const magnitude_type d_i_mag = KAT::abs (d_i);
1108 // Work-around for GitHub Issue #5269.
1109 //const magnitude_type d_i_real = KAT::real (d_i);
1110 const auto d_i_real = getRealValue (d_i);
1111
1112 // We can't compare complex numbers, but we can compare their
1113 // real parts.
1114 if (d_i_real < STM::zero ()) {
1115 ++numNegDiagEntries;
1116 }
1117 if (d_i_mag < minMagDiagEntryMag) {
1118 minMagDiagEntryMag = d_i_mag;
1119 }
1120 if (d_i_mag > maxMagDiagEntryMag) {
1121 maxMagDiagEntryMag = d_i_mag;
1122 }
1123
1124 if (fixTinyDiagEntries_) {
1125 // <= not <, in case minDiagValMag is zero.
1126 if (d_i_mag <= minDiagValMag) {
1127 ++numSmallDiagEntries;
1128 if (d_i_mag == STM::zero ()) {
1129 ++numZeroDiagEntries;
1130 }
1131 diag(i) = oneOverMinDiagVal;
1132 }
1133 else {
1134 diag(i) = KAT::one () / d_i;
1135 }
1136 }
1137 else { // Don't fix zero or tiny diagonal entries.
1138 // <= not <, in case minDiagValMag is zero.
1139 if (d_i_mag <= minDiagValMag) {
1140 ++numSmallDiagEntries;
1141 if (d_i_mag == STM::zero ()) {
1142 ++numZeroDiagEntries;
1143 }
1144 }
1145 diag(i) = KAT::one () / d_i;
1146 }
1147 }
1148
1149 // Collect global data about the diagonal entries. Only do this
1150 // (which involves O(1) all-reduces) if the user asked us to do
1151 // the extra work.
1152 //
1153 // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1154 // zero rows. Fixing this requires one of two options:
1155 //
1156 // 1. Use a custom reduction operation that ignores processes
1157 // with zero diagonal entries.
1158 // 2. Split the communicator, compute all-reduces using the
1159 // subcommunicator over processes that have a nonzero number
1160 // of diagonal entries, and then broadcast from one of those
1161 // processes (if there is one) to the processes in the other
1162 // subcommunicator.
1163 const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1164
1165 // Compute global min and max magnitude of entries.
1166 Array<magnitude_type> localVals (2);
1167 localVals[0] = minMagDiagEntryMag;
1168 // (- (min (- x))) is the same as (max x).
1169 localVals[1] = -maxMagDiagEntryMag;
1170 Array<magnitude_type> globalVals (2);
1171 reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1172 localVals.getRawPtr (),
1173 globalVals.getRawPtr ());
1174 globalMinMagDiagEntryMag_ = globalVals[0];
1175 globalMaxMagDiagEntryMag_ = -globalVals[1];
1176
1177 Array<size_t> localCounts (3);
1178 localCounts[0] = numSmallDiagEntries;
1179 localCounts[1] = numZeroDiagEntries;
1180 localCounts[2] = numNegDiagEntries;
1181 Array<size_t> globalCounts (3);
1182 reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1183 localCounts.getRawPtr (),
1184 globalCounts.getRawPtr ());
1185 globalNumSmallDiagEntries_ = globalCounts[0];
1186 globalNumZeroDiagEntries_ = globalCounts[1];
1187 globalNumNegDiagEntries_ = globalCounts[2];
1188
1189 // Compute and save the difference between the computed inverse
1190 // diagonal, and the original diagonal's inverse.
1191 vector_type diff (A_->getRowMap ());
1192 diff.reciprocal (*origDiag);
1193 diff.update (-one, *Diagonal_, one);
1194 globalDiagNormDiff_ = diff.norm2 ();
1195 }
1196
1197 // Extract the diagonal entries. The CrsMatrix graph version is
1198 // faster than the row matrix version for subsequent calls to
1199 // compute(), since it caches the diagonal offsets.
1200
1201 bool debugAgainstSlowPath = false;
1202
1203 auto crsMat = Details::getCrsMatrix(A_);
1204
1205 if (crsMat.get() && crsMat->isFillComplete ()) {
1206 // The invDiagKernel object computes diagonal offsets if
1207 // necessary. The "compute" call extracts diagonal enties,
1208 // optionally applies the L1 method and replacement of small
1209 // entries, and then inverts.
1210 if (invDiagKernel_.is_null())
1211 invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(crsMat));
1212 else
1213 invDiagKernel_->setMatrix(crsMat);
1214 invDiagKernel_->compute(*Diagonal_,
1215 DoL1Method_ && IsParallel_, L1Eta_,
1216 fixTinyDiagEntries_, minDiagValMag);
1217 savedDiagOffsets_ = true;
1218
1219 // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1220 // environment variable to control this behavior at run time.
1221#ifdef HAVE_IFPACK2_DEBUG
1222 debugAgainstSlowPath = true;
1223#endif
1224 }
1225
1226 if (crsMat.is_null() || ! crsMat->isFillComplete () || debugAgainstSlowPath) {
1227 // We could not call the CrsMatrix version above, or want to
1228 // debug by comparing against the slow path.
1229
1230 // FIXME: The L1 method in this code path runs on host, since we
1231 // don't have device access for row matrices.
1232
1233 RCP<vector_type> Diagonal;
1234 if (debugAgainstSlowPath)
1235 Diagonal = rcp(new vector_type(A_->getRowMap ()));
1236 else
1237 Diagonal = Diagonal_;
1238
1239 A_->getLocalDiagCopy (*Diagonal); // slow path for row matrix
1240
1241 // Setup for L1 Methods.
1242 // Here we add half the value of the off-processor entries in the row,
1243 // but only if diagonal isn't sufficiently large.
1244 //
1245 // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1246 // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1247 // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1248 //
1249 if (DoL1Method_ && IsParallel_) {
1250 const row_matrix_type& A_row = *A_;
1251 auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1252 const magnitude_type two = STM::one () + STM::one ();
1253 const size_t maxLength = A_row.getLocalMaxNumRowEntries ();
1254 nonconst_local_inds_host_view_type indices("indices",maxLength);
1255 nonconst_values_host_view_type values("values",maxLength);
1256 size_t numEntries;
1257
1258 for (LO i = 0; i < numMyRows; ++i) {
1259 A_row.getLocalRowCopy (i, indices, values, numEntries);
1260 magnitude_type diagonal_boost = STM::zero ();
1261 for (size_t k = 0 ; k < numEntries; ++k) {
1262 if (indices[k] >= numMyRows) {
1263 diagonal_boost += STS::magnitude (values[k] / two);
1264 }
1265 }
1266 if (KAT::magnitude (diag(i, 0)) < L1Eta_ * diagonal_boost) {
1267 diag(i, 0) += diagonal_boost;
1268 }
1269 }
1270 }
1271
1272 //
1273 // Invert the matrix's diagonal entries (in Diagonal).
1274 //
1275 if (fixTinyDiagEntries_) {
1276 // Go through all the diagonal entries. Invert those that
1277 // aren't too small in magnitude. For those that are too
1278 // small in magnitude, replace them with oneOverMinDiagVal.
1279 auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1280 Kokkos::parallel_for(Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1281 KOKKOS_LAMBDA (local_ordinal_type i) {
1282 auto d_i = localDiag(i, 0);
1283 const magnitude_type d_i_mag = KAT::magnitude (d_i);
1284 // <= not <, in case minDiagValMag is zero.
1285 if (d_i_mag <= minDiagValMag) {
1286 d_i = oneOverMinDiagVal;
1287 }
1288 else {
1289 // For Stokhos types, operator/ returns an expression
1290 // type. Explicitly convert to IST before returning.
1291 d_i = IST (KAT::one () / d_i);
1292 }
1293 localDiag(i, 0) = d_i;
1294 });
1295 }
1296 else { // don't fix tiny or zero diagonal entries
1297 Diagonal->reciprocal (*Diagonal);
1298 }
1299
1300 if (debugAgainstSlowPath) {
1301 // Validate the fast-path diagonal against the slow-path diagonal.
1302 Diagonal->update (STS::one (), *Diagonal_, -STS::one ());
1303 const magnitude_type err = Diagonal->normInf ();
1304 // The two diagonals should be exactly the same, so their
1305 // difference should be exactly zero.
1306 TEUCHOS_TEST_FOR_EXCEPTION
1307 (err > 100*STM::eps(), std::logic_error, methodName << ": "
1308 << "\"fast-path\" diagonal computation failed. "
1309 "\\|D1 - D2\\|_inf = " << err << ".");
1310 }
1311 }
1312
1313 // Count floating-point operations of computing the inverse diagonal.
1314 //
1315 // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1316 if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1317 ComputeFlops_ += 4.0 * numMyRows;
1318 }
1319 else {
1320 ComputeFlops_ += numMyRows;
1321 }
1322
1323 if (PrecType_ == Ifpack2::Details::MTGS ||
1324 PrecType_ == Ifpack2::Details::MTSGS ||
1325 PrecType_ == Ifpack2::Details::GS2 ||
1326 PrecType_ == Ifpack2::Details::SGS2) {
1327
1328 //KokkosKernels GaussSeidel Initialization.
1329 using scalar_view_t = typename local_matrix_device_type::values_type;
1330
1331 TEUCHOS_TEST_FOR_EXCEPTION
1332 (crsMat.is_null(), std::logic_error, methodName << ": "
1333 "Multithreaded Gauss-Seidel methods currently only work "
1334 "when the input matrix is a Tpetra::CrsMatrix.");
1335 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
1336
1337 //TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1338 //const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1339 auto diagView_2d = Diagonal_->getLocalViewDevice (Tpetra::Access::ReadWrite);
1340 scalar_view_t diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1341 KokkosSparse::Experimental::gauss_seidel_numeric(
1342 mtKernelHandle_.getRawPtr (),
1343 A_->getLocalNumRows (),
1344 A_->getLocalNumCols (),
1345 kcsr.graph.row_map,
1346 kcsr.graph.entries,
1347 kcsr.values,
1348 diagView_1d,
1349 is_matrix_structurally_symmetric_);
1350 }
1351 else if(PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1352 if(crsMat)
1353 serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1354 else
1355 serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1356 }
1357 } // end TimeMonitor scope
1358
1359 ComputeTime_ += (timer->wallTime() - startTime);
1360 ++NumCompute_;
1361 IsComputed_ = true;
1362}
1363
1364
1365template<class MatrixType>
1366void
1368ApplyInverseRichardson (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1369 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1370{
1371 using Teuchos::as;
1372 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1373 const double numVectors = as<double> (X.getNumVectors ());
1374 if (ZeroStartingSolution_) {
1375 // For the first Richardson sweep, if we are allowed to assume that
1376 // the initial guess is zero, then Richardson is just alpha times the RHS
1377 // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1378 Y.scale(DampingFactor_,X);
1379
1380 // Count (global) floating-point operations. Ifpack2 represents
1381 // this as a floating-point number rather than an integer, so that
1382 // overflow (for a very large number of calls, or a very large
1383 // problem) is approximate instead of catastrophic.
1384 double flopUpdate = 0.0;
1385 if (DampingFactor_ == STS::one ()) {
1386 // Y(i,j) = X(i,j): one multiply for each entry of Y.
1387 flopUpdate = numGlobalRows * numVectors;
1388 } else {
1389 // Y(i,j) = DampingFactor_ * X(i,j):
1390 // One multiplies per entry of Y.
1391 flopUpdate = numGlobalRows * numVectors;
1392 }
1393 ApplyFlops_ += flopUpdate;
1394 if (NumSweeps_ == 1) {
1395 return;
1396 }
1397 }
1398 // If we were allowed to assume that the starting guess was zero,
1399 // then we have already done the first sweep above.
1400 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1401
1402 // Allocate a multivector if the cached one isn't perfect
1403 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1404
1405 for (int j = startSweep; j < NumSweeps_; ++j) {
1406 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1407 Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1408 Y.update(DampingFactor_,*cachedMV_,STS::one());
1409 }
1410
1411 // For each column of output, for each pass over the matrix:
1412 //
1413 // - One + and one * for each matrix entry
1414 // - One / and one + for each row of the matrix
1415 // - If the damping factor is not one: one * for each row of the
1416 // matrix. (It's not fair to count this if the damping factor is
1417 // one, since the implementation could skip it. Whether it does
1418 // or not is the implementation's choice.)
1419
1420 // Floating-point operations due to the damping factor, per matrix
1421 // row, per direction, per columm of output.
1422 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1423 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1424 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1425 (2.0 * numGlobalNonzeros + dampingFlops);
1426}
1427
1428
1429template<class MatrixType>
1430void
1432ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1433 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1434{
1435 using Teuchos::as;
1436 if (hasBlockCrsMatrix_) {
1437 ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1438 return;
1439 }
1440
1441 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1442 const double numVectors = as<double> (X.getNumVectors ());
1443 if (ZeroStartingSolution_) {
1444 // For the first Jacobi sweep, if we are allowed to assume that
1445 // the initial guess is zero, then Jacobi is just diagonal
1446 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1447 // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1448 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1449
1450 // Count (global) floating-point operations. Ifpack2 represents
1451 // this as a floating-point number rather than an integer, so that
1452 // overflow (for a very large number of calls, or a very large
1453 // problem) is approximate instead of catastrophic.
1454 double flopUpdate = 0.0;
1455 if (DampingFactor_ == STS::one ()) {
1456 // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1457 flopUpdate = numGlobalRows * numVectors;
1458 } else {
1459 // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1460 // Two multiplies per entry of Y.
1461 flopUpdate = 2.0 * numGlobalRows * numVectors;
1462 }
1463 ApplyFlops_ += flopUpdate;
1464 if (NumSweeps_ == 1) {
1465 return;
1466 }
1467 }
1468 // If we were allowed to assume that the starting guess was zero,
1469 // then we have already done the first sweep above.
1470 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1471
1472 // Allocate a multivector if the cached one isn't perfect
1473 updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1474
1475 for (int j = startSweep; j < NumSweeps_; ++j) {
1476 // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1477 Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1478 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1479 }
1480
1481 // For each column of output, for each pass over the matrix:
1482 //
1483 // - One + and one * for each matrix entry
1484 // - One / and one + for each row of the matrix
1485 // - If the damping factor is not one: one * for each row of the
1486 // matrix. (It's not fair to count this if the damping factor is
1487 // one, since the implementation could skip it. Whether it does
1488 // or not is the implementation's choice.)
1489
1490 // Floating-point operations due to the damping factor, per matrix
1491 // row, per direction, per columm of output.
1492 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1493 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1494 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1495 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1496}
1497
1498template<class MatrixType>
1499void
1501ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1504 node_type>& X,
1505 Tpetra::MultiVector<scalar_type,
1508 node_type>& Y) const
1509{
1510 using Tpetra::BlockMultiVector;
1511 using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1513
1514 const block_crs_matrix_type* blockMatConst =
1515 dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1516 TEUCHOS_TEST_FOR_EXCEPTION
1517 (blockMatConst == nullptr, std::logic_error, "This method should "
1518 "never be called if the matrix A_ is not a BlockCrsMatrix. "
1519 "Please report this bug to the Ifpack2 developers.");
1520 // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1521 // This is because applyBlock() is nonconst (more accurate), while
1522 // apply() is const (required by Tpetra::Operator interface, but a
1523 // lie, because it possibly allocates temporary buffers).
1524 block_crs_matrix_type* blockMat =
1525 const_cast<block_crs_matrix_type*> (blockMatConst);
1526
1527 auto meshRowMap = blockMat->getRowMap ();
1528 auto meshColMap = blockMat->getColMap ();
1529 const local_ordinal_type blockSize = blockMat->getBlockSize ();
1530
1531 BMV X_blk (X, *meshColMap, blockSize);
1532 BMV Y_blk (Y, *meshRowMap, blockSize);
1533
1534 if (ZeroStartingSolution_) {
1535 // For the first sweep, if we are allowed to assume that the
1536 // initial guess is zero, then block Jacobi is just block diagonal
1537 // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1538 Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1539 if (NumSweeps_ == 1) {
1540 return;
1541 }
1542 }
1543
1544 auto pointRowMap = Y.getMap ();
1545 const size_t numVecs = X.getNumVectors ();
1546
1547 // We don't need to initialize the result MV, since the sparse
1548 // mat-vec will clobber its contents anyway.
1549 BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1550
1551 // If we were allowed to assume that the starting guess was zero,
1552 // then we have already done the first sweep above.
1553 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1554
1555 for (int j = startSweep; j < NumSweeps_; ++j) {
1556 blockMat->applyBlock (Y_blk, A_times_Y);
1557 // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1558 Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1559 X_blk, A_times_Y, STS::one ());
1560 }
1561}
1562
1563template<class MatrixType>
1564void
1566ApplyInverseSerialGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1567 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1568 Tpetra::ESweepDirection direction) const
1569{
1570 using this_type = Relaxation<MatrixType>;
1571 // The CrsMatrix version is faster, because it can access the sparse
1572 // matrix data directly, rather than by copying out each row's data
1573 // in turn. Thus, we check whether the RowMatrix is really a
1574 // CrsMatrix.
1575 //
1576 // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1577 // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1578 // will still be correct if the cast fails, but it will use an
1579 // unoptimized kernel.
1580 auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
1581 auto crsMat = Details::getCrsMatrix(A_);
1582 if (blockCrsMat.get()) {
1583 const_cast<this_type&> (*this).ApplyInverseSerialGS_BlockCrsMatrix (*blockCrsMat, X, Y, direction);
1584 }
1585 else if (crsMat.get()) {
1586 ApplyInverseSerialGS_CrsMatrix (*crsMat, X, Y, direction);
1587 }
1588 else {
1589 ApplyInverseSerialGS_RowMatrix (X, Y, direction);
1590 }
1591}
1592
1593
1594template<class MatrixType>
1595void
1597ApplyInverseSerialGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1598 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1599 Tpetra::ESweepDirection direction) const {
1600 using Teuchos::Array;
1601 using Teuchos::ArrayRCP;
1602 using Teuchos::ArrayView;
1603 using Teuchos::as;
1604 using Teuchos::RCP;
1605 using Teuchos::rcp;
1606 using Teuchos::rcpFromRef;
1607 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1608
1609 // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1610 // starting multivector itself. The generic RowMatrix version here
1611 // does not, so we have to zero out Y here.
1612 if (ZeroStartingSolution_) {
1613 Y.putScalar (STS::zero ());
1614 }
1615
1616 size_t NumVectors = X.getNumVectors();
1617
1618 RCP<MV> Y2;
1619 if (IsParallel_) {
1620 if (Importer_.is_null ()) { // domain and column Maps are the same.
1621 updateCachedMultiVector (Y.getMap (), NumVectors);
1622 }
1623 else {
1624 updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
1625 }
1626 Y2 = cachedMV_;
1627 }
1628 else {
1629 Y2 = rcpFromRef (Y);
1630 }
1631
1632 for (int j = 0; j < NumSweeps_; ++j) {
1633 // data exchange is here, once per sweep
1634 if (IsParallel_) {
1635 if (Importer_.is_null ()) {
1636 // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1637 // clear if this is correct. Reevaluate at some point.
1638
1639 *Y2 = Y; // just copy, since domain and column Maps are the same
1640 } else {
1641 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1642 }
1643 }
1644 serialGaussSeidel_->apply(*Y2, X, direction);
1645
1646 // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1647 if (IsParallel_) {
1648 Tpetra::deep_copy (Y, *Y2);
1649 }
1650 }
1651
1652 // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1653 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1654 const double numVectors = as<double> (X.getNumVectors ());
1655 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1656 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1657 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1658 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1659}
1660
1661
1662template<class MatrixType>
1663void
1665ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1666 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1667 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1668 Tpetra::ESweepDirection direction) const
1669{
1670 using Teuchos::null;
1671 using Teuchos::RCP;
1672 using Teuchos::rcp;
1673 using Teuchos::rcpFromRef;
1674 using Teuchos::rcp_const_cast;
1675 typedef scalar_type Scalar;
1676 const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1677 const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1678
1679 TEUCHOS_TEST_FOR_EXCEPTION(
1680 ! A.isFillComplete (), std::runtime_error,
1681 prefix << "The matrix is not fill complete.");
1682 TEUCHOS_TEST_FOR_EXCEPTION(
1683 NumSweeps_ < 0, std::invalid_argument,
1684 prefix << "The number of sweeps must be nonnegative, "
1685 "but you provided numSweeps = " << NumSweeps_ << " < 0.");
1686
1687 if (NumSweeps_ == 0) {
1688 return;
1689 }
1690
1691 RCP<const import_type> importer = A.getGraph ()->getImporter ();
1692
1693 RCP<const map_type> domainMap = A.getDomainMap ();
1694 RCP<const map_type> rangeMap = A.getRangeMap ();
1695 RCP<const map_type> rowMap = A.getGraph ()->getRowMap ();
1696 RCP<const map_type> colMap = A.getGraph ()->getColMap ();
1697
1698#ifdef HAVE_IFPACK2_DEBUG
1699 {
1700 // The relation 'isSameAs' is transitive. It's also a
1701 // collective, so we don't have to do a "shared" test for
1702 // exception (i.e., a global reduction on the test value).
1703 TEUCHOS_TEST_FOR_EXCEPTION(
1704 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1705 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1706 "multivector X be in the domain Map of the matrix.");
1707 TEUCHOS_TEST_FOR_EXCEPTION(
1708 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1709 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1710 "B be in the range Map of the matrix.");
1711 TEUCHOS_TEST_FOR_EXCEPTION(
1712 ! Diagonal_->getMap ()->isSameAs (*rowMap), std::runtime_error,
1713 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1714 "D be in the row Map of the matrix.");
1715 TEUCHOS_TEST_FOR_EXCEPTION(
1716 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1717 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1718 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1719 TEUCHOS_TEST_FOR_EXCEPTION(
1720 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1721 "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1722 "the range Map of the matrix be the same.");
1723 }
1724#endif
1725
1726 // Fetch a (possibly cached) temporary column Map multivector
1727 // X_colMap, and a domain Map view X_domainMap of it. Both have
1728 // constant stride by construction. We know that the domain Map
1729 // must include the column Map, because our Gauss-Seidel kernel
1730 // requires that the row Map, domain Map, and range Map are all
1731 // the same, and that each process owns all of its own diagonal
1732 // entries of the matrix.
1733
1734 RCP<multivector_type> X_colMap;
1735 RCP<multivector_type> X_domainMap;
1736 bool copyBackOutput = false;
1737 if (importer.is_null ()) {
1738 X_colMap = Teuchos::rcpFromRef (X);
1739 X_domainMap = Teuchos::rcpFromRef (X);
1740 // Column Map and domain Map are the same, so there are no
1741 // remote entries. Thus, if we are not setting the initial
1742 // guess to zero, we don't have to worry about setting remote
1743 // entries to zero, even though we are not doing an Import in
1744 // this case.
1745 if (ZeroStartingSolution_) {
1746 X_colMap->putScalar (ZERO);
1747 }
1748 // No need to copy back to X at end.
1749 }
1750 else { // Column Map and domain Map are _not_ the same.
1751 updateCachedMultiVector(colMap, X.getNumVectors());
1752 X_colMap = cachedMV_;
1753 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1754
1755 if (ZeroStartingSolution_) {
1756 // No need for an Import, since we're filling with zeros.
1757 X_colMap->putScalar (ZERO);
1758 } else {
1759 // We could just copy X into X_domainMap. However, that
1760 // wastes a copy, because the Import also does a copy (plus
1761 // communication). Since the typical use case for
1762 // Gauss-Seidel is a small number of sweeps (2 is typical), we
1763 // don't want to waste that copy. Thus, we do the Import
1764 // here, and skip the first Import in the first sweep.
1765 // Importing directly from X effects the copy into X_domainMap
1766 // (which is a view of X_colMap).
1767 X_colMap->doImport (X, *importer, Tpetra::INSERT);
1768 }
1769 copyBackOutput = true; // Don't forget to copy back at end.
1770 } // if column and domain Maps are (not) the same
1771
1772 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1773 if (! importer.is_null () && sweep > 0) {
1774 // We already did the first Import for the zeroth sweep above,
1775 // if it was necessary.
1776 X_colMap->doImport (*X_domainMap, *importer, Tpetra::INSERT);
1777 }
1778 // Do local Gauss-Seidel (forward, backward or symmetric)
1779 serialGaussSeidel_->apply(*X_colMap, B, direction);
1780 }
1781
1782 if (copyBackOutput) {
1783 try {
1784 deep_copy (X , *X_domainMap); // Copy result back into X.
1785 } catch (std::exception& e) {
1786 TEUCHOS_TEST_FOR_EXCEPTION(
1787 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1788 "threw an exception: " << e.what ());
1789 }
1790 }
1791
1792 // For each column of output, for each sweep over the matrix:
1793 //
1794 // - One + and one * for each matrix entry
1795 // - One / and one + for each row of the matrix
1796 // - If the damping factor is not one: one * for each row of the
1797 // matrix. (It's not fair to count this if the damping factor is
1798 // one, since the implementation could skip it. Whether it does
1799 // or not is the implementation's choice.)
1800
1801 // Floating-point operations due to the damping factor, per matrix
1802 // row, per direction, per columm of output.
1803 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1804 const double numVectors = X.getNumVectors ();
1805 const double numGlobalRows = A_->getGlobalNumRows ();
1806 const double numGlobalNonzeros = A_->getGlobalNumEntries ();
1807 ApplyFlops_ += NumSweeps_ * numVectors *
1808 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1809}
1810
1811template<class MatrixType>
1812void
1814ApplyInverseSerialGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1815 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1816 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1817 Tpetra::ESweepDirection direction)
1818{
1819 using Tpetra::INSERT;
1820 using Teuchos::RCP;
1821 using Teuchos::rcp;
1822 using Teuchos::rcpFromRef;
1823
1824 //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1825 //
1826 // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1827 // multiple right-hand sides, unless the input or output MultiVector
1828 // does not have constant stride. We should check for that case
1829 // here, in case it doesn't work in localGaussSeidel (which is
1830 // entirely possible).
1831 block_multivector_type yBlock(Y, *(A.getGraph ()->getDomainMap()), A.getBlockSize());
1832 const block_multivector_type xBlock(X, *(A.getColMap ()), A.getBlockSize());
1833
1834 bool performImport = false;
1835 RCP<block_multivector_type> yBlockCol;
1836 if (Importer_.is_null()) {
1837 yBlockCol = rcpFromRef(yBlock);
1838 }
1839 else {
1840 if (yBlockColumnPointMap_.is_null() ||
1841 yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1842 yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1843 yBlockColumnPointMap_ =
1844 rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1845 static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1846 }
1847 yBlockCol = yBlockColumnPointMap_;
1848 if (pointImporter_.is_null()) {
1849 auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1850 auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1851 pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1852 }
1853 performImport = true;
1854 }
1855
1856 multivector_type yBlock_mv;
1857 multivector_type yBlockCol_mv;
1858 RCP<const multivector_type> yBlockColPointDomain;
1859 if (performImport) { // create views (shallow copies)
1860 yBlock_mv = yBlock.getMultiVectorView();
1861 yBlockCol_mv = yBlockCol->getMultiVectorView();
1862 yBlockColPointDomain =
1863 yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1864 }
1865
1866 if (ZeroStartingSolution_) {
1867 yBlockCol->putScalar(STS::zero ());
1868 }
1869 else if (performImport) {
1870 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1871 }
1872
1873 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1874 if (performImport && sweep > 0) {
1875 yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1876 }
1877 serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1878 if (performImport) {
1879 Tpetra::deep_copy(Y, *yBlockColPointDomain);
1880 }
1881 }
1882}
1883
1884template<class MatrixType>
1885void
1888 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1889 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1890 Tpetra::ESweepDirection direction) const
1891{
1892 using Teuchos::null;
1893 using Teuchos::RCP;
1894 using Teuchos::rcp;
1895 using Teuchos::rcpFromRef;
1896 using Teuchos::rcp_const_cast;
1897 using Teuchos::as;
1898
1899 typedef scalar_type Scalar;
1900
1901 const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1902 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1903
1904 auto crsMat = Details::getCrsMatrix(A_);
1905 TEUCHOS_TEST_FOR_EXCEPTION
1906 (crsMat.is_null(), std::logic_error, "Ifpack2::Relaxation::apply: "
1907 "Multithreaded Gauss-Seidel methods currently only work when the "
1908 "input matrix is a Tpetra::CrsMatrix.");
1909
1910 //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1911 TEUCHOS_TEST_FOR_EXCEPTION
1912 (! localSmoothingIndices_.is_null (), std::logic_error,
1913 "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1914 "implement the case where the user supplies an iteration order. "
1915 "This error used to appear as \"MT GaussSeidel ignores the given "
1916 "order\". "
1917 "I tried to add more explanation, but I didn't implement \"MT "
1918 "GaussSeidel\" [sic]. "
1919 "You'll have to ask the person who did.");
1920
1921 TEUCHOS_TEST_FOR_EXCEPTION
1922 (! crsMat->isFillComplete (), std::runtime_error, prefix << "The "
1923 "input CrsMatrix is not fill complete. Please call fillComplete "
1924 "on the matrix, then do setup again, before calling apply(). "
1925 "\"Do setup\" means that if only the matrix's values have changed "
1926 "since last setup, you need only call compute(). If the matrix's "
1927 "structure may also have changed, you must first call initialize(), "
1928 "then call compute(). If you have not set up this preconditioner "
1929 "for this matrix before, you must first call initialize(), then "
1930 "call compute().");
1931 TEUCHOS_TEST_FOR_EXCEPTION
1932 (NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = "
1933 << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1934 "developers.");
1935 if (NumSweeps_ == 0) {
1936 return;
1937 }
1938
1939 RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1940 TEUCHOS_TEST_FOR_EXCEPTION(
1941 ! crsMat->getGraph ()->getExporter ().is_null (), std::runtime_error,
1942 "This method's implementation currently requires that the matrix's row, "
1943 "domain, and range Maps be the same. This cannot be the case, because "
1944 "the matrix has a nontrivial Export object.");
1945
1946 RCP<const map_type> domainMap = crsMat->getDomainMap ();
1947 RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1948 RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1949 RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1950
1951#ifdef HAVE_IFPACK2_DEBUG
1952 {
1953 // The relation 'isSameAs' is transitive. It's also a
1954 // collective, so we don't have to do a "shared" test for
1955 // exception (i.e., a global reduction on the test value).
1956 TEUCHOS_TEST_FOR_EXCEPTION(
1957 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1958 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1959 "multivector X be in the domain Map of the matrix.");
1960 TEUCHOS_TEST_FOR_EXCEPTION(
1961 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1962 "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1963 "B be in the range Map of the matrix.");
1964 TEUCHOS_TEST_FOR_EXCEPTION(
1965 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1966 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1967 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1968 TEUCHOS_TEST_FOR_EXCEPTION(
1969 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1970 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1971 "the range Map of the matrix be the same.");
1972 }
1973#else
1974 // Forestall any compiler warnings for unused variables.
1975 (void) rangeMap;
1976 (void) rowMap;
1977#endif // HAVE_IFPACK2_DEBUG
1978
1979 // Fetch a (possibly cached) temporary column Map multivector
1980 // X_colMap, and a domain Map view X_domainMap of it. Both have
1981 // constant stride by construction. We know that the domain Map
1982 // must include the column Map, because our Gauss-Seidel kernel
1983 // requires that the row Map, domain Map, and range Map are all
1984 // the same, and that each process owns all of its own diagonal
1985 // entries of the matrix.
1986
1987 RCP<multivector_type> X_colMap;
1988 RCP<multivector_type> X_domainMap;
1989 bool copyBackOutput = false;
1990 if (importer.is_null ()) {
1991 if (X.isConstantStride ()) {
1992 X_colMap = rcpFromRef (X);
1993 X_domainMap = rcpFromRef (X);
1994
1995 // Column Map and domain Map are the same, so there are no
1996 // remote entries. Thus, if we are not setting the initial
1997 // guess to zero, we don't have to worry about setting remote
1998 // entries to zero, even though we are not doing an Import in
1999 // this case.
2000 if (ZeroStartingSolution_) {
2001 X_colMap->putScalar (ZERO);
2002 }
2003 // No need to copy back to X at end.
2004 }
2005 else {
2006 // We must copy X into a constant stride multivector.
2007 // Just use the cached column Map multivector for that.
2008 // force=true means fill with zeros, so no need to fill
2009 // remote entries (not in domain Map) with zeros.
2010 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2011 X_colMap = cachedMV_;
2012 // X_domainMap is always a domain Map view of the column Map
2013 // multivector. In this case, the domain and column Maps are
2014 // the same, so X_domainMap _is_ X_colMap.
2015 X_domainMap = X_colMap;
2016 if (! ZeroStartingSolution_) { // Don't copy if zero initial guess
2017 try {
2018 deep_copy (*X_domainMap , X); // Copy X into constant stride MV
2019 } catch (std::exception& e) {
2020 std::ostringstream os;
2021 os << "Ifpack2::Relaxation::MTGaussSeidel: "
2022 "deep_copy(*X_domainMap, X) threw an exception: "
2023 << e.what () << ".";
2024 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2025 }
2026 }
2027 copyBackOutput = true; // Don't forget to copy back at end.
2028 /*
2029 TPETRA_EFFICIENCY_WARNING(
2030 ! X.isConstantStride (),
2031 std::runtime_error,
2032 "MTGaussSeidel: The current implementation of the Gauss-Seidel "
2033 "kernel requires that X and B both have constant stride. Since X "
2034 "does not have constant stride, we had to make a copy. This is a "
2035 "limitation of the current implementation and not your fault, but we "
2036 "still report it as an efficiency warning for your information.");
2037 */
2038 }
2039 }
2040 else { // Column Map and domain Map are _not_ the same.
2041 updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2042 X_colMap = cachedMV_;
2043
2044 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
2045
2046 if (ZeroStartingSolution_) {
2047 // No need for an Import, since we're filling with zeros.
2048 X_colMap->putScalar (ZERO);
2049 } else {
2050 // We could just copy X into X_domainMap. However, that
2051 // wastes a copy, because the Import also does a copy (plus
2052 // communication). Since the typical use case for
2053 // Gauss-Seidel is a small number of sweeps (2 is typical), we
2054 // don't want to waste that copy. Thus, we do the Import
2055 // here, and skip the first Import in the first sweep.
2056 // Importing directly from X effects the copy into X_domainMap
2057 // (which is a view of X_colMap).
2058 X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
2059 }
2060 copyBackOutput = true; // Don't forget to copy back at end.
2061 } // if column and domain Maps are (not) the same
2062
2063 // The Gauss-Seidel / SOR kernel expects multivectors of constant
2064 // stride. X_colMap is by construction, but B might not be. If
2065 // it's not, we have to make a copy.
2066 RCP<const multivector_type> B_in;
2067 if (B.isConstantStride ()) {
2068 B_in = rcpFromRef (B);
2069 }
2070 else {
2071 // Range Map and row Map are the same in this case, so we can
2072 // use the cached row Map multivector to store a constant stride
2073 // copy of B.
2074 RCP<multivector_type> B_in_nonconst = rcp (new multivector_type(rowMap, B.getNumVectors()));
2075 try {
2076 deep_copy (*B_in_nonconst, B);
2077 } catch (std::exception& e) {
2078 std::ostringstream os;
2079 os << "Ifpack2::Relaxation::MTGaussSeidel: "
2080 "deep_copy(*B_in_nonconst, B) threw an exception: "
2081 << e.what () << ".";
2082 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2083 }
2084 B_in = rcp_const_cast<const multivector_type> (B_in_nonconst);
2085
2086 /*
2087 TPETRA_EFFICIENCY_WARNING(
2088 ! B.isConstantStride (),
2089 std::runtime_error,
2090 "MTGaussSeidel: The current implementation requires that B have "
2091 "constant stride. Since B does not have constant stride, we had to "
2092 "copy it into a separate constant-stride multivector. This is a "
2093 "limitation of the current implementation and not your fault, but we "
2094 "still report it as an efficiency warning for your information.");
2095 */
2096 }
2097
2098 local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
2099
2100 bool update_y_vector = true;
2101 //false as it was done up already, and we dont want to zero it in each sweep.
2102 bool zero_x_vector = false;
2103
2104 for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2105 if (! importer.is_null () && sweep > 0) {
2106 // We already did the first Import for the zeroth sweep above,
2107 // if it was necessary.
2108 X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2109 }
2110
2111 if (direction == Tpetra::Symmetric) {
2112 KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2113 (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2114 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2115 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2116 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2117 zero_x_vector, update_y_vector, DampingFactor_, 1);
2118 }
2119 else if (direction == Tpetra::Forward) {
2120 KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2121 (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2122 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2123 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2124 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2125 zero_x_vector, update_y_vector, DampingFactor_, 1);
2126 }
2127 else if (direction == Tpetra::Backward) {
2128 KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2129 (mtKernelHandle_.getRawPtr(), A_->getLocalNumRows(), A_->getLocalNumCols(),
2130 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2131 X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2132 B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2133 zero_x_vector, update_y_vector, DampingFactor_, 1);
2134 }
2135 else {
2136 TEUCHOS_TEST_FOR_EXCEPTION(
2137 true, std::invalid_argument,
2138 prefix << "The 'direction' enum does not have any of its valid "
2139 "values: Forward, Backward, or Symmetric.");
2140 }
2141 update_y_vector = false;
2142 }
2143
2144 if (copyBackOutput) {
2145 try {
2146 deep_copy (X , *X_domainMap); // Copy result back into X.
2147 } catch (std::exception& e) {
2148 TEUCHOS_TEST_FOR_EXCEPTION(
2149 true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2150 "threw an exception: " << e.what ());
2151 }
2152 }
2153
2154 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2155 const double numVectors = as<double> (X.getNumVectors ());
2156 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2157 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2158 double ApplyFlops = NumSweeps_ * numVectors *
2159 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2160 if (direction == Tpetra::Symmetric)
2161 ApplyFlops *= 2.0;
2162 ApplyFlops_ += ApplyFlops;
2163}
2164
2165template<class MatrixType>
2167{
2168 std::ostringstream os;
2169
2170 // Output is a valid YAML dictionary in flow style. If you don't
2171 // like everything on a single line, you should call describe()
2172 // instead.
2173 os << "\"Ifpack2::Relaxation\": {";
2174
2175 os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2176 << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2177
2178 // It's useful to print this instance's relaxation method (Jacobi,
2179 // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2180 // than that, call describe() instead.
2181 os << "Type: ";
2182 if (PrecType_ == Ifpack2::Details::JACOBI) {
2183 os << "Jacobi";
2184 } else if (PrecType_ == Ifpack2::Details::GS) {
2185 os << "Gauss-Seidel";
2186 } else if (PrecType_ == Ifpack2::Details::SGS) {
2187 os << "Symmetric Gauss-Seidel";
2188 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2189 os << "MT Gauss-Seidel";
2190 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2191 os << "MT Symmetric Gauss-Seidel";
2192 } else if (PrecType_ == Ifpack2::Details::GS2) {
2193 os << "Two-stage Gauss-Seidel";
2194 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2195 os << "Two-stage Symmetric Gauss-Seidel";
2196 }
2197 else {
2198 os << "INVALID";
2199 }
2200 if(hasBlockCrsMatrix_)
2201 os<<", BlockCrs";
2202
2203 os << ", " << "sweeps: " << NumSweeps_ << ", "
2204 << "damping factor: " << DampingFactor_ << ", ";
2205
2206 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2207 os << "\"relaxation: mtgs cluster size\": " << clusterSize_ << ", ";
2208 os << "\"relaxation: long row threshold\": " << longRowThreshold_ << ", ";
2209 os << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << ", ";
2210 os << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2211 switch(mtColoringAlgorithm_)
2212 {
2213 case KokkosGraph::COLORING_DEFAULT:
2214 os << "DEFAULT"; break;
2215 case KokkosGraph::COLORING_SERIAL:
2216 os << "SERIAL"; break;
2217 case KokkosGraph::COLORING_VB:
2218 os << "VB"; break;
2219 case KokkosGraph::COLORING_VBBIT:
2220 os << "VBBIT"; break;
2221 case KokkosGraph::COLORING_VBCS:
2222 os << "VBCS"; break;
2223 case KokkosGraph::COLORING_VBD:
2224 os << "VBD"; break;
2225 case KokkosGraph::COLORING_VBDBIT:
2226 os << "VBDBIT"; break;
2227 case KokkosGraph::COLORING_EB:
2228 os << "EB"; break;
2229 default:
2230 os << "*Invalid*";
2231 }
2232 os << ", ";
2233 }
2234
2235 if (PrecType_ == Ifpack2::Details::GS2 ||
2236 PrecType_ == Ifpack2::Details::SGS2) {
2237 os << "outer sweeps: " << NumOuterSweeps_ << ", "
2238 << "inner sweeps: " << NumInnerSweeps_ << ", "
2239 << "inner damping factor: " << InnerDampingFactor_ << ", ";
2240 }
2241
2242 if (DoL1Method_) {
2243 os << "use l1: " << DoL1Method_ << ", "
2244 << "l1 eta: " << L1Eta_ << ", ";
2245 }
2246
2247 if (A_.is_null ()) {
2248 os << "Matrix: null";
2249 }
2250 else {
2251 os << "Global matrix dimensions: ["
2252 << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2253 << ", Global nnz: " << A_->getGlobalNumEntries();
2254 }
2255
2256 os << "}";
2257 return os.str ();
2258}
2259
2260
2261template<class MatrixType>
2262void
2264describe (Teuchos::FancyOStream &out,
2265 const Teuchos::EVerbosityLevel verbLevel) const
2266{
2267 using Teuchos::OSTab;
2268 using Teuchos::TypeNameTraits;
2269 using Teuchos::VERB_DEFAULT;
2270 using Teuchos::VERB_NONE;
2271 using Teuchos::VERB_LOW;
2272 using Teuchos::VERB_MEDIUM;
2273 using Teuchos::VERB_HIGH;
2274 using Teuchos::VERB_EXTREME;
2275 using std::endl;
2276
2277 const Teuchos::EVerbosityLevel vl =
2278 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2279
2280 const int myRank = this->getComm ()->getRank ();
2281
2282 // none: print nothing
2283 // low: print O(1) info from Proc 0
2284 // medium:
2285 // high:
2286 // extreme:
2287
2288 if (vl != VERB_NONE && myRank == 0) {
2289 // Describable interface asks each implementation to start with a tab.
2290 OSTab tab1 (out);
2291 // Output is valid YAML; hence the quotes, to protect the colons.
2292 out << "\"Ifpack2::Relaxation\":" << endl;
2293 OSTab tab2 (out);
2294 out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2295 << endl;
2296 if (this->getObjectLabel () != "") {
2297 out << "Label: " << this->getObjectLabel () << endl;
2298 }
2299 out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2300 << "Computed: " << (isComputed () ? "true" : "false") << endl
2301 << "Parameters: " << endl;
2302 {
2303 OSTab tab3 (out);
2304 out << "\"relaxation: type\": ";
2305 if (PrecType_ == Ifpack2::Details::JACOBI) {
2306 out << "Jacobi";
2307 } else if (PrecType_ == Ifpack2::Details::GS) {
2308 out << "Gauss-Seidel";
2309 } else if (PrecType_ == Ifpack2::Details::SGS) {
2310 out << "Symmetric Gauss-Seidel";
2311 } else if (PrecType_ == Ifpack2::Details::MTGS) {
2312 out << "MT Gauss-Seidel";
2313 } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2314 out << "MT Symmetric Gauss-Seidel";
2315 } else if (PrecType_ == Ifpack2::Details::GS2) {
2316 out << "Two-stage Gauss-Seidel";
2317 } else if (PrecType_ == Ifpack2::Details::SGS2) {
2318 out << "Two-stage Symmetric Gauss-Seidel";
2319 } else {
2320 out << "INVALID";
2321 }
2322 // We quote these parameter names because they contain colons.
2323 // YAML uses the colon to distinguish key from value.
2324 out << endl
2325 << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2326 << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2327 << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2328 << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2329 << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2330 << "\"relaxation: use l1\": " << DoL1Method_ << endl
2331 << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2332 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
2333 out << "\"relaxation: mtgs cluster size\": " << clusterSize_ << endl;
2334 out << "\"relaxation: long row threshold\": " << longRowThreshold_ << endl;
2335 out << "\"relaxation: symmetric matrix structure\": " << (is_matrix_structurally_symmetric_ ? "true" : "false") << endl;
2336 out << "\"relaxation: relaxation: mtgs coloring algorithm\": ";
2337 switch(mtColoringAlgorithm_)
2338 {
2339 case KokkosGraph::COLORING_DEFAULT:
2340 out << "DEFAULT"; break;
2341 case KokkosGraph::COLORING_SERIAL:
2342 out << "SERIAL"; break;
2343 case KokkosGraph::COLORING_VB:
2344 out << "VB"; break;
2345 case KokkosGraph::COLORING_VBBIT:
2346 out << "VBBIT"; break;
2347 case KokkosGraph::COLORING_VBCS:
2348 out << "VBCS"; break;
2349 case KokkosGraph::COLORING_VBD:
2350 out << "VBD"; break;
2351 case KokkosGraph::COLORING_VBDBIT:
2352 out << "VBDBIT"; break;
2353 case KokkosGraph::COLORING_EB:
2354 out << "EB"; break;
2355 default:
2356 out << "*Invalid*";
2357 }
2358 out << endl;
2359 }
2360 if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2361 out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2362 out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2363 out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2364 }
2365 }
2366 out << "Computed quantities:" << endl;
2367 {
2368 OSTab tab3 (out);
2369 out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2370 << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2371 }
2372 if (checkDiagEntries_ && isComputed ()) {
2373 out << "Properties of input diagonal entries:" << endl;
2374 {
2375 OSTab tab3 (out);
2376 out << "Magnitude of minimum-magnitude diagonal entry: "
2377 << globalMinMagDiagEntryMag_ << endl
2378 << "Magnitude of maximum-magnitude diagonal entry: "
2379 << globalMaxMagDiagEntryMag_ << endl
2380 << "Number of diagonal entries with small magnitude: "
2381 << globalNumSmallDiagEntries_ << endl
2382 << "Number of zero diagonal entries: "
2383 << globalNumZeroDiagEntries_ << endl
2384 << "Number of diagonal entries with negative real part: "
2385 << globalNumNegDiagEntries_ << endl
2386 << "Abs 2-norm diff between computed and actual inverse "
2387 << "diagonal: " << globalDiagNormDiff_ << endl;
2388 }
2389 }
2390 if (isComputed ()) {
2391 out << "Saved diagonal offsets: "
2392 << (savedDiagOffsets_ ? "true" : "false") << endl;
2393 }
2394 out << "Call counts and total times (in seconds): " << endl;
2395 {
2396 OSTab tab3 (out);
2397 out << "initialize: " << endl;
2398 {
2399 OSTab tab4 (out);
2400 out << "Call count: " << NumInitialize_ << endl;
2401 out << "Total time: " << InitializeTime_ << endl;
2402 }
2403 out << "compute: " << endl;
2404 {
2405 OSTab tab4 (out);
2406 out << "Call count: " << NumCompute_ << endl;
2407 out << "Total time: " << ComputeTime_ << endl;
2408 }
2409 out << "apply: " << endl;
2410 {
2411 OSTab tab4 (out);
2412 out << "Call count: " << NumApply_ << endl;
2413 out << "Total time: " << ApplyTime_ << endl;
2414 }
2415 }
2416 }
2417}
2418
2419
2420} // namespace Ifpack2
2421
2422#define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2423 template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2424
2425#endif // IFPACK2_RELAXATION_DEF_HPP
File for utility functions.
Compute scaled damped residual for Chebyshev.
Definition Ifpack2_Details_InverseDiagonalKernel_decl.hpp:45
Diagonal preconditioner.
Definition Ifpack2_Diagonal_decl.hpp:47
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition Ifpack2_Relaxation_decl.hpp:217
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition Ifpack2_Relaxation_def.hpp:183
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:229
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition Ifpack2_Relaxation_def.hpp:441
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:232
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition Ifpack2_Relaxation_def.hpp:451
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition Ifpack2_Relaxation_def.hpp:471
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition Ifpack2_Relaxation_def.hpp:530
int getNumCompute() const
Total number of calls to compute().
Definition Ifpack2_Relaxation_def.hpp:506
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition Ifpack2_Relaxation_def.hpp:462
void compute()
Compute the preconditioner ("numeric setup");.
Definition Ifpack2_Relaxation_def.hpp:994
void initialize()
Initialize the preconditioner ("symbolic setup").
Definition Ifpack2_Relaxation_def.hpp:696
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition Ifpack2_Relaxation_def.hpp:524
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition Ifpack2_Relaxation_def.hpp:2264
std::string description() const
A simple one-line description of this object.
Definition Ifpack2_Relaxation_def.hpp:2166
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition Ifpack2_Relaxation_def.hpp:195
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition Ifpack2_Relaxation_def.hpp:518
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_Relaxation_def.hpp:162
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:223
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition Ifpack2_Relaxation_def.hpp:562
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_Relaxation_decl.hpp:226
bool isComputed() const
Return true if compute() has been called.
Definition Ifpack2_Relaxation_decl.hpp:395
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition Ifpack2_Relaxation_def.hpp:675
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition Ifpack2_Relaxation_decl.hpp:242
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_Relaxation_decl.hpp:238
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition Ifpack2_Relaxation_def.hpp:549
int getNumApply() const
Total number of calls to apply().
Definition Ifpack2_Relaxation_def.hpp:512
int getNumInitialize() const
Total number of calls to initialize().
Definition Ifpack2_Relaxation_def.hpp:500
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition Ifpack2_Relaxation_def.hpp:536
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition Ifpack2_Relaxation_def.hpp:494
bool isInitialized() const
Returns true if the preconditioner has been successfully initialized.
Definition Ifpack2_Relaxation_decl.hpp:387
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition Ifpack2_Relaxation_def.hpp:484
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition Ifpack2_Relaxation_def.hpp:542
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:41
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition Ifpack2_Parameters.cpp:18