Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_LocalSparseTriangularSolver_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
11#define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
12
13#include <sstream> // ostringstream
14#include <stdexcept> // runtime_error
15
16#include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
17#include "Tpetra_CrsMatrix.hpp"
18#include "Tpetra_Core.hpp"
19#include "Teuchos_StandardParameterEntryValidators.hpp"
20#include "Tpetra_Details_determineLocalTriangularStructure.hpp"
21#include "KokkosSparse_trsv.hpp"
22
23#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
24# include "shylu_hts.hpp"
25#endif
26
27namespace Ifpack2 {
28
29namespace Details {
30
31#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
32
33inline void cusparse_error_throw(cusparseStatus_t cusparseStatus, const char* name,
34 const char* file, const int line) {
35 std::ostringstream out;
36#if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION)
37 out << name << " error( " << cusparseGetErrorName(cusparseStatus) << "): " << cusparseGetErrorString(cusparseStatus);
38#else
39 out << name << " error( ";
40 switch (cusparseStatus) {
41 case CUSPARSE_STATUS_NOT_INITIALIZED:
42 out << "CUSPARSE_STATUS_NOT_INITIALIZED): cusparse handle was not "
43 "created correctly.";
44 break;
45 case CUSPARSE_STATUS_ALLOC_FAILED:
46 out << "CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too "
47 "much memory";
48 break;
49 case CUSPARSE_STATUS_INVALID_VALUE: out << "CUSPARSE_STATUS_INVALID_VALUE)"; break;
50 case CUSPARSE_STATUS_ARCH_MISMATCH: out << "CUSPARSE_STATUS_ARCH_MISMATCH)"; break;
51 case CUSPARSE_STATUS_MAPPING_ERROR: out << "CUSPARSE_STATUS_MAPPING_ERROR)"; break;
52 case CUSPARSE_STATUS_EXECUTION_FAILED: out << "CUSPARSE_STATUS_EXECUTION_FAILED)"; break;
53 case CUSPARSE_STATUS_INTERNAL_ERROR: out << "CUSPARSE_STATUS_INTERNAL_ERROR)"; break;
54 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out << "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; break;
55 case CUSPARSE_STATUS_ZERO_PIVOT: out << "CUSPARSE_STATUS_ZERO_PIVOT)"; break;
56 default: out << "unrecognized error code): this is bad!"; break;
57 }
58#endif // CUSPARSE_VERSION
59 if (file) {
60 out << " " << file << ":" << line;
61 }
62 throw std::runtime_error(out.str());
63}
64
65inline void cusparse_safe_call(cusparseStatus_t cusparseStatus, const char* name, const char* file = nullptr,
66 const int line = 0) {
67 if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) {
68 cusparse_error_throw(cusparseStatus, name, file, line);
69 }
70}
71
72#define IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(call) \
73 Ifpack2::Details::cusparse_safe_call(call, #call, __FILE__, __LINE__)
74
75#endif // defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
76
77struct TrisolverType {
78 enum Enum {
79 Internal,
80 HTS,
81 KSPTRSV
82 };
83
84 static void loadPLTypeOption (Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
85 type_strs.resize(3);
86 type_strs[0] = "Internal";
87 type_strs[1] = "HTS";
88 type_strs[2] = "KSPTRSV";
89 type_enums.resize(3);
90 type_enums[0] = Internal;
91 type_enums[1] = HTS;
92 type_enums[2] = KSPTRSV;
93 }
94};
95}
96
97template<class MatrixType>
98class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
99public:
100 typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
101
102 void reset () {
103#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
104 Timpl_ = Teuchos::null;
105 levelset_block_size_ = 1;
106#endif
107 }
108
109 void setParameters (const Teuchos::ParameterList& pl) {
110 (void)pl;
111#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
112 const char* block_size_s = "trisolver: block size";
113 if (pl.isParameter(block_size_s)) {
114 TEUCHOS_TEST_FOR_EXCEPT_MSG( ! pl.isType<int>(block_size_s),
115 "The parameter \"" << block_size_s << "\" must be of type int.");
116 levelset_block_size_ = pl.get<int>(block_size_s);
117 }
118 if (levelset_block_size_ < 1)
119 levelset_block_size_ = 1;
120#endif
121 }
122
123 // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
124 // call to compute() will trigger the symbolic+numeric phase, and subsequent
125 // calls (with the same Timpl_) will trigger the numeric phase. In the call to
126 // initialize(), essentially nothing happens.
127 void initialize (const crs_matrix_type& /* unused */) {
128#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
129 reset();
130 transpose_ = conjugate_ = false;
131#endif
132 }
133
134 void compute (const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
135 (void)T_in;
136 (void)out;
137#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
138 using Teuchos::ArrayRCP;
139
140 auto rowptr = T_in.getLocalRowPtrsHost();
141 auto colidx = T_in.getLocalIndicesHost();
142 auto val = T_in.getLocalValuesHost(Tpetra::Access::ReadOnly);
143 Kokkos::fence();
144
145 Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
146 HTST::make_CrsMatrix(rowptr.size() - 1,
147 rowptr.data(), colidx.data(),
148 // For std/Kokkos::complex.
149 reinterpret_cast<const scalar_type*>(val.data()),
150 transpose_, conjugate_),
151 HtsCrsMatrixDeleter());
152
153 if (Teuchos::nonnull(Timpl_)) {
154 // Reuse the nonzero pattern.
155 HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
156 } else {
157 // Build from scratch.
158 if (T_in.getCrsGraph().is_null()) {
159 if (Teuchos::nonnull(out))
160 *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
161 return;
162 }
163 if ( ! T_in.getCrsGraph()->isSorted()) {
164 if (Teuchos::nonnull(out))
165 *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
166 return;
167 }
168 if ( ! T_in.isStorageOptimized()) {
169 if (Teuchos::nonnull(out))
170 *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
171 return;
172 }
173
174 typename HTST::PreprocessArgs args;
175 args.T = T_hts.get();
176 args.max_nrhs = 1;
177#ifdef _OPENMP
178 args.nthreads = omp_get_max_threads();
179#else
180 args.nthreads = 1;
181#endif
182 args.save_for_reprocess = true;
183 typename HTST::Options opts;
184 opts.levelset_block_size = levelset_block_size_;
185 args.options = &opts;
186
187 try {
188 Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
189 } catch (const std::exception& e) {
190 if (Teuchos::nonnull(out))
191 *out << "HTS preprocess threw: " << e.what() << "\n";
192 }
193 }
194#endif
195 }
196
197 // HTS may not be able to handle a matrix, so query whether compute()
198 // succeeded.
199 bool isComputed () {
200#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
201 return Teuchos::nonnull(Timpl_);
202#else
203 return false;
204#endif
205 }
206
207 // Y := beta * Y + alpha * (M * X)
208 void localApply (const MV& X, MV& Y,
209 const Teuchos::ETransp /* mode */,
210 const scalar_type& alpha, const scalar_type& beta) const {
211 (void)X;
212 (void)Y;
213 (void)alpha;
214 (void)beta;
215#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
216 const auto& X_view = X.getLocalViewHost (Tpetra::Access::ReadOnly);
217 const auto& Y_view = Y.getLocalViewHost (Tpetra::Access::ReadWrite);
218
219 // Only does something if #rhs > current capacity.
220 HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
221 // Switch alpha and beta because of HTS's opposite convention.
222 HTST::solve_omp(Timpl_.get(),
223 // For std/Kokkos::complex.
224 reinterpret_cast<const scalar_type*>(X_view.data()),
225 X_view.extent(1),
226 // For std/Kokkos::complex.
227 reinterpret_cast<scalar_type*>(Y_view.data()),
228 beta, alpha);
229#endif
230 }
231
232private:
233#ifdef HAVE_IFPACK2_SHYLU_NODEHTS
234 typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
235 typedef typename HTST::Impl TImpl;
236 typedef typename HTST::CrsMatrix HtsCrsMatrix;
237
238 struct TImplDeleter {
239 void free (TImpl* impl) {
240 HTST::delete_Impl(impl);
241 }
242 };
243
244 struct HtsCrsMatrixDeleter {
245 void free (HtsCrsMatrix* T) {
246 HTST::delete_CrsMatrix(T);
247 }
248 };
249
250 Teuchos::RCP<TImpl> Timpl_;
251 bool transpose_, conjugate_;
252 int levelset_block_size_;
253#endif
254};
255
256template<class MatrixType>
258LocalSparseTriangularSolver (const Teuchos::RCP<const row_matrix_type>& A) :
259 A_ (A)
260{
261 initializeState();
262
263 if (! A.is_null ()) {
264 Teuchos::RCP<const crs_matrix_type> A_crs =
265 Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
266 TEUCHOS_TEST_FOR_EXCEPTION
267 (A_crs.is_null (), std::invalid_argument,
268 "Ifpack2::LocalSparseTriangularSolver constructor: "
269 "The input matrix A is not a Tpetra::CrsMatrix.");
270 A_crs_ = A_crs;
271 }
272}
273
274template<class MatrixType>
276LocalSparseTriangularSolver (const Teuchos::RCP<const row_matrix_type>& A,
277 const Teuchos::RCP<Teuchos::FancyOStream>& out) :
278 A_ (A),
279 out_ (out)
280{
281 initializeState();
282 if (! out_.is_null ()) {
283 *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
284 << std::endl;
285 }
286
287 if (! A.is_null ()) {
288 Teuchos::RCP<const crs_matrix_type> A_crs =
289 Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
290 TEUCHOS_TEST_FOR_EXCEPTION
291 (A_crs.is_null (), std::invalid_argument,
292 "Ifpack2::LocalSparseTriangularSolver constructor: "
293 "The input matrix A is not a Tpetra::CrsMatrix.");
294 A_crs_ = A_crs;
295 }
296}
297
298template<class MatrixType>
304
305template<class MatrixType>
307LocalSparseTriangularSolver (const bool /* unused */, const Teuchos::RCP<Teuchos::FancyOStream>& out) :
308 out_ (out)
309{
310 initializeState();
311 if (! out_.is_null ()) {
312 *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
313 << std::endl;
314 }
315}
316
317template<class MatrixType>
318void LocalSparseTriangularSolver<MatrixType>::initializeState ()
319{
320 isInitialized_ = false;
321 isComputed_ = false;
322 reverseStorage_ = false;
323 isInternallyChanged_ = false;
324 numInitialize_ = 0;
325 numCompute_ = 0;
326 numApply_ = 0;
327 initializeTime_ = 0.0;
328 computeTime_ = 0.0;
329 applyTime_ = 0.0;
330 isKokkosKernelsSptrsv_ = false;
331 isKokkosKernelsStream_ = false;
332 num_streams_ = 0;
333 uplo_ = "N";
334 diag_ = "N";
335}
336
337template<class MatrixType>
340{
341 if (!isKokkosKernelsStream_) {
342 if (Teuchos::nonnull (kh_)) {
343 kh_->destroy_sptrsv_handle();
344 }
345 }
346 else {
347 for (int i = 0; i < num_streams_; i++) {
348 if (Teuchos::nonnull (kh_v_[i])) {
349 kh_v_[i]->destroy_sptrsv_handle();
350 }
351 }
352 }
353}
354
355template<class MatrixType>
356void
358setParameters (const Teuchos::ParameterList& pl)
359{
360 using Teuchos::RCP;
361 using Teuchos::ParameterList;
362 using Teuchos::Array;
363
364 Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
365 do {
366 static const char typeName[] = "trisolver: type";
367
368 if ( ! pl.isType<std::string>(typeName)) break;
369
370 // Map std::string <-> TrisolverType::Enum.
371 Array<std::string> trisolverTypeStrs;
372 Array<Details::TrisolverType::Enum> trisolverTypeEnums;
373 Details::TrisolverType::loadPLTypeOption (trisolverTypeStrs, trisolverTypeEnums);
374 Teuchos::StringToIntegralParameterEntryValidator<Details::TrisolverType::Enum>
375 s2i(trisolverTypeStrs (), trisolverTypeEnums (), typeName, false);
376
377 trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
378 } while (0);
379
380 if (trisolverType == Details::TrisolverType::HTS) {
381 htsImpl_ = Teuchos::rcp (new HtsImpl ());
382 htsImpl_->setParameters (pl);
383 }
384
385 if (trisolverType == Details::TrisolverType::KSPTRSV) {
386 this->isKokkosKernelsSptrsv_ = true;
387 }
388 else {
389 this->isKokkosKernelsSptrsv_ = false;
390 }
391
392 if (pl.isParameter("trisolver: reverse U"))
393 reverseStorage_ = pl.get<bool>("trisolver: reverse U");
394
395 TEUCHOS_TEST_FOR_EXCEPTION
396 (reverseStorage_ && (trisolverType == Details::TrisolverType::HTS || trisolverType == Details::TrisolverType::KSPTRSV),
397 std::logic_error, "Ifpack2::LocalSparseTriangularSolver::setParameters: "
398 "You are not allowed to enable both HTS or KSPTRSV and the \"trisolver: reverse U\" "
399 "options. See GitHub issue #2647.");
400}
401
402template<class MatrixType>
403void
405initialize ()
406{
407 using Tpetra::Details::determineLocalTriangularStructure;
408
409 using local_matrix_type = typename crs_matrix_type::local_matrix_device_type;
410 using LO = local_ordinal_type;
411
412 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
413 if (! out_.is_null ()) {
414 *out_ << ">>> DEBUG " << prefix << std::endl;
415 }
416
417 if (!isKokkosKernelsStream_) {
418 TEUCHOS_TEST_FOR_EXCEPTION
419 (A_.is_null (), std::runtime_error, prefix << "You must call "
420 "setMatrix() with a nonnull input matrix before you may call "
421 "initialize() or compute().");
422 if (A_crs_.is_null ()) {
423 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
424 TEUCHOS_TEST_FOR_EXCEPTION
425 (A_crs.get () == nullptr, std::invalid_argument,
426 prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
427 A_crs_ = A_crs;
428 }
429 auto G = A_crs_->getGraph ();
430 TEUCHOS_TEST_FOR_EXCEPTION
431 (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
432 "but A_crs_'s RowGraph G is null. "
433 "Please report this bug to the Ifpack2 developers.");
434 // At this point, the graph MUST be fillComplete. The "initialize"
435 // (symbolic) part of setup only depends on the graph structure, so
436 // the matrix itself need not be fill complete.
437 TEUCHOS_TEST_FOR_EXCEPTION
438 (! G->isFillComplete (), std::runtime_error, "If you call this method, "
439 "the matrix's graph must be fill complete. It is not.");
440
441 // mfh 30 Apr 2018: See GitHub Issue #2658.
442 constexpr bool ignoreMapsForTriStructure = true;
443 auto lclTriStructure = [&] {
444 auto lclMatrix = A_crs_->getLocalMatrixDevice ();
445 auto lclRowMap = A_crs_->getRowMap ()->getLocalMap ();
446 auto lclColMap = A_crs_->getColMap ()->getLocalMap ();
447 auto lclTriStruct =
448 determineLocalTriangularStructure (lclMatrix.graph,
449 lclRowMap,
450 lclColMap,
451 ignoreMapsForTriStructure);
452 const LO lclNumRows = lclRowMap.getLocalNumElements ();
453 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
454 this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" :
455 (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
456 return lclTriStruct;
457 } ();
458
459 if (reverseStorage_ && lclTriStructure.couldBeUpperTriangular &&
460 htsImpl_.is_null ()) {
461 // Reverse the storage for an upper triangular matrix
462 auto Alocal = A_crs_->getLocalMatrixDevice();
463 auto ptr = Alocal.graph.row_map;
464 auto ind = Alocal.graph.entries;
465 auto val = Alocal.values;
466
467 auto numRows = Alocal.numRows();
468 auto numCols = Alocal.numCols();
469 auto numNnz = Alocal.nnz();
470
471 typename decltype(ptr)::non_const_type newptr ("ptr", ptr.extent (0));
472 typename decltype(ind)::non_const_type newind ("ind", ind.extent (0));
473 decltype(val) newval ("val", val.extent (0));
474
475 // FIXME: The code below assumes UVM
476 typename crs_matrix_type::execution_space().fence();
477 newptr(0) = 0;
478 for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
479 auto A_r = Alocal.row(numRows-1 - row);
480
481 auto numEnt = A_r.length;
482 for (local_ordinal_type k = 0; k < numEnt; ++k) {
483 newind(rowStart + k) = numCols-1 - A_r.colidx(numEnt-1 - k);
484 newval(rowStart + k) = A_r.value (numEnt-1 - k);
485 }
486 rowStart += numEnt;
487 newptr(row+1) = rowStart;
488 }
489 typename crs_matrix_type::execution_space().fence();
490
491 // Reverse maps
492 Teuchos::RCP<map_type> newRowMap, newColMap;
493 {
494 // Reverse row map
495 auto rowMap = A_->getRowMap();
496 auto numElems = rowMap->getLocalNumElements();
497 auto rowElems = rowMap->getLocalElementList();
498
499 Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
500 for (size_t i = 0; i < numElems; i++)
501 newRowElems[i] = rowElems[numElems-1 - i];
502
503 newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
504 }
505 {
506 // Reverse column map
507 auto colMap = A_->getColMap();
508 auto numElems = colMap->getLocalNumElements();
509 auto colElems = colMap->getLocalElementList();
510
511 Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
512 for (size_t i = 0; i < numElems; i++)
513 newColElems[i] = colElems[numElems-1 - i];
514
515 newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
516 }
517
518 // Construct new matrix
519 local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
520
521 A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
522
523 isInternallyChanged_ = true;
524
525 // FIXME (mfh 18 Apr 2019) Recomputing this is unnecessary, but I
526 // didn't want to break any invariants, especially considering
527 // that this branch is likely poorly tested.
528 auto newLclTriStructure =
529 determineLocalTriangularStructure (newLocalMatrix.graph,
530 newRowMap->getLocalMap (),
531 newColMap->getLocalMap (),
532 ignoreMapsForTriStructure);
533 const LO newLclNumRows = newRowMap->getLocalNumElements ();
534 this->diag_ = (newLclTriStructure.diagCount < newLclNumRows) ? "U" : "N";
535 this->uplo_ = newLclTriStructure.couldBeLowerTriangular ? "L" :
536 (newLclTriStructure.couldBeUpperTriangular ? "U" : "N");
537 }
538 }
539 else {
540 for (int i = 0; i < num_streams_; i++) {
541 TEUCHOS_TEST_FOR_EXCEPTION
542 (A_crs_v_[i].is_null (), std::runtime_error, prefix << "You must call "
543 "setMatrix() with a nonnull input matrix before you may call "
544 "initialize() or compute().");
545 auto G = A_crs_v_[i]->getGraph ();
546 TEUCHOS_TEST_FOR_EXCEPTION
547 (G.is_null (), std::logic_error, prefix << "A_crs_ are nonnull, "
548 "but A_crs_'s RowGraph G is null. "
549 "Please report this bug to the Ifpack2 developers.");
550 // At this point, the graph MUST be fillComplete. The "initialize"
551 // (symbolic) part of setup only depends on the graph structure, so
552 // the matrix itself need not be fill complete.
553 TEUCHOS_TEST_FOR_EXCEPTION
554 (! G->isFillComplete (), std::runtime_error, "If you call this method, "
555 "the matrix's graph must be fill complete. It is not.");
556
557 // mfh 30 Apr 2018: See GitHub Issue #2658.
558 constexpr bool ignoreMapsForTriStructure = true;
559 std::string prev_uplo_ = this->uplo_;
560 std::string prev_diag_ = this->diag_;
561 auto lclMatrix = A_crs_v_[i]->getLocalMatrixDevice ();
562 auto lclRowMap = A_crs_v_[i]->getRowMap ()->getLocalMap ();
563 auto lclColMap = A_crs_v_[i]->getColMap ()->getLocalMap ();
564 auto lclTriStruct =
565 determineLocalTriangularStructure (lclMatrix.graph,
566 lclRowMap,
567 lclColMap,
568 ignoreMapsForTriStructure);
569 const LO lclNumRows = lclRowMap.getLocalNumElements ();
570 this->diag_ = (lclTriStruct.diagCount < lclNumRows) ? "U" : "N";
571 this->uplo_ = lclTriStruct.couldBeLowerTriangular ? "L" :
572 (lclTriStruct.couldBeUpperTriangular ? "U" : "N");
573 if (i > 0) {
574 TEUCHOS_TEST_FOR_EXCEPTION
575 ((this->diag_ != prev_diag_) || (this->uplo_ != prev_uplo_),
576 std::logic_error, prefix << "A_crs_'s structures in streams "
577 "are different. Please report this bug to the Ifpack2 developers.");
578 }
579 }
580 }
581
582 if (Teuchos::nonnull (htsImpl_))
583 {
584 htsImpl_->initialize (*A_crs_);
585 isInternallyChanged_ = true;
586 }
587
588 const bool ksptrsv_valid_uplo = (this->uplo_ != "N");
589 kh_v_nonnull_ = false;
590 if (this->isKokkosKernelsSptrsv_ && ksptrsv_valid_uplo && this->diag_ != "U")
591 {
592 if (!isKokkosKernelsStream_) {
593 kh_ = Teuchos::rcp (new k_handle());
594 const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
595
596 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_, true);
597 auto Alocal = A_crs->getLocalMatrixDevice();
598 auto ptr = Alocal.graph.row_map;
599 auto ind = Alocal.graph.entries;
600 auto val = Alocal.values;
601
602 auto numRows = Alocal.numRows();
603 kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri);
604 KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
605 } else {
606 kh_v_ = std::vector< Teuchos::RCP<k_handle> >(num_streams_);
607 for (int i = 0; i < num_streams_; i++) {
608 kh_v_[i] = Teuchos::rcp (new k_handle ());
609 auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i], true);
610 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
611 auto ptr_i = Alocal_i.graph.row_map;
612 auto ind_i = Alocal_i.graph.entries;
613 auto val_i = Alocal_i.values;
614
615 auto numRows_i = Alocal_i.numRows();
616
617 const bool is_lower_tri = (this->uplo_ == "L") ? true : false;
618 kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri);
619 KokkosSparse::Experimental::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
620 }
621 kh_v_nonnull_ = true;
622 }
623 }
624
625 isInitialized_ = true;
626 ++numInitialize_;
627}
628
629template<class MatrixType>
630KokkosSparse::Experimental::SPTRSVAlgorithm
631LocalSparseTriangularSolver<MatrixType>::kokkosKernelsAlgorithm() const
632{
633#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA)
634 // CuSparse only supports int type ordinals
635 // and scalar types of float, double, float complex and double complex
636 if constexpr (std::is_same<Kokkos::Cuda, HandleExecSpace>::value &&
637 std::is_same<int, local_ordinal_type>::value &&
638 (std::is_same<scalar_type, float>::value ||
639 std::is_same<scalar_type, double>::value ||
640 std::is_same<scalar_type, Kokkos::complex<float>>::value ||
641 std::is_same<scalar_type, Kokkos::complex<double>>::value))
642 {
643 return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE;
644 }
645#endif
646 return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
647}
648
649template<class MatrixType>
650void
652compute ()
653{
654 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
655 if (! out_.is_null ()) {
656 *out_ << ">>> DEBUG " << prefix << std::endl;
657 }
658
659 if (!isKokkosKernelsStream_) {
660 TEUCHOS_TEST_FOR_EXCEPTION
661 (A_.is_null (), std::runtime_error, prefix << "You must call "
662 "setMatrix() with a nonnull input matrix before you may call "
663 "initialize() or compute().");
664 TEUCHOS_TEST_FOR_EXCEPTION
665 (A_crs_.is_null (), std::logic_error, prefix << "A_ is nonnull, but "
666 "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
667 // At this point, the matrix MUST be fillComplete.
668 TEUCHOS_TEST_FOR_EXCEPTION
669 (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
670 "method, the matrix must be fill complete. It is not.");
671 }
672 else {
673 for(int i = 0; i < num_streams_; i++) {
674 TEUCHOS_TEST_FOR_EXCEPTION
675 (A_crs_v_[i].is_null (), std::runtime_error, prefix << "You must call "
676 "setMatrices() with a nonnull input matrix before you may call "
677 "initialize() or compute().");
678 // At this point, the matrix MUST be fillComplete.
679 TEUCHOS_TEST_FOR_EXCEPTION
680 (! A_crs_v_[i]->isFillComplete (), std::runtime_error, "If you call this "
681 "method, the matrix must be fill complete. It is not.");
682 }
683 }
684
685 if (! isInitialized_) {
686 initialize ();
687 }
688 TEUCHOS_TEST_FOR_EXCEPTION
689 (! isInitialized_, std::logic_error, prefix << "initialize() should have "
690 "been called by this point, but isInitialized_ is false. "
691 "Please report this bug to the Ifpack2 developers.");
692
693// NOTE (Nov-09-2022):
694// For Cuda >= 11.3 (using cusparseSpSV), always call symbolic during compute
695// even when matrix values are changed with the same sparsity pattern.
696// For Cuda >= 12.1 has a new cusparseSpSV_updateMatrix function just for updating the
697// values that is substantially faster.
698// This would all be much better handled via a KokkosSparse::Experimental::sptrsv_numeric(...)
699// that could hide the Cuda implementation details.
700#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030)
701 if constexpr ( std::is_same_v<typename crs_matrix_type::execution_space, Kokkos::Cuda> )
702 {
703 if (this->isKokkosKernelsSptrsv_) {
704 if (Teuchos::nonnull(kh_) && !isKokkosKernelsStream_) {
705 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_, true);
706 auto Alocal = A_crs->getLocalMatrixDevice();
707 auto val = Alocal.values;
708 #if (CUSPARSE_VERSION >= 12100)
709 auto *sptrsv_handle = kh_->get_sptrsv_handle();
710 auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
711 cusparseSpSV_updateMatrix(cusparse_handle->handle,
712 cusparse_handle->spsvDescr,
713 val.data(),
714 CUSPARSE_SPSV_UPDATE_GENERAL);
715 #else
716 auto ptr = Alocal.graph.row_map;
717 auto ind = Alocal.graph.entries;
718 KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val);
719 #endif
720 } else if (kh_v_nonnull_) {
721 for (int i = 0; i < num_streams_; i++) {
722 auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i], true);
723 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
724 auto val_i = Alocal_i.values;
725 #if (CUSPARSE_VERSION >= 12100)
726 auto *sptrsv_handle = kh_v_[i]->get_sptrsv_handle();
727 auto cusparse_handle = sptrsv_handle->get_cuSparseHandle();
728 IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(
729 cusparseSetStream(cusparse_handle->handle, exec_space_instances_[i].cuda_stream()));
730 cusparseSpSV_updateMatrix(cusparse_handle->handle,
731 cusparse_handle->spsvDescr,
732 val_i.data(),
733 CUSPARSE_SPSV_UPDATE_GENERAL);
734 #else
735 auto ptr_i = Alocal_i.graph.row_map;
736 auto ind_i = Alocal_i.graph.entries;
737 KokkosSparse::Experimental::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i);
738 #endif
739 }
740 }
741 }
742 }
743#endif
744
745 if (! isComputed_) {//Only compute if not computed before
746 if (Teuchos::nonnull (htsImpl_))
747 htsImpl_->compute (*A_crs_, out_);
748
749 isComputed_ = true;
750 ++numCompute_;
751 }
752}
753
754template<class MatrixType>
756apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type,
758 Tpetra::MultiVector<scalar_type, local_ordinal_type,
760 Teuchos::ETransp mode,
761 scalar_type alpha,
762 scalar_type beta) const
763{
764 using Teuchos::RCP;
765 using Teuchos::rcp;
766 using Teuchos::rcpFromRef;
767 typedef scalar_type ST;
768 typedef Teuchos::ScalarTraits<ST> STS;
769 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
770
771 if (! out_.is_null ()) {
772 *out_ << ">>> DEBUG " << prefix;
773 if (!isKokkosKernelsStream_) {
774 if (A_crs_.is_null ()) {
775 *out_ << "A_crs_ is null!" << std::endl;
776 }
777 else {
778 Teuchos::RCP<const crs_matrix_type> A_crs =
779 Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
780 const std::string uplo = this->uplo_;
781 const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
782 (mode == Teuchos::TRANS ? "T" : "N");
783 const std::string diag = this->diag_;
784 *out_ << "uplo=\"" << uplo
785 << "\", trans=\"" << trans
786 << "\", diag=\"" << diag << "\"" << std::endl;
787 }
788 }
789 else {
790 for (int i = 0; i < num_streams_; i++) {
791 if (A_crs_v_[i].is_null ()) {
792 *out_ << "A_crs_v_[" << i << "]" << " is null!" << std::endl;
793 }
794 else {
795 const std::string uplo = this->uplo_;
796 const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
797 (mode == Teuchos::TRANS ? "T" : "N");
798 const std::string diag = this->diag_;
799 *out_ << "A_crs_v_[" << i << "]: "
800 << "uplo=\"" << uplo
801 << "\", trans=\"" << trans
802 << "\", diag=\"" << diag << "\"" << std::endl;
803 }
804 }
805 }
806 }
807
808 TEUCHOS_TEST_FOR_EXCEPTION
809 (! isComputed (), std::runtime_error, prefix << "If compute() has not yet "
810 "been called, or if you have changed the matrix via setMatrix(), you must "
811 "call compute() before you may call this method.");
812 // If isComputed() is true, it's impossible for the matrix to be
813 // null, or for it not to be a Tpetra::CrsMatrix.
814 if (!isKokkosKernelsStream_) {
815 TEUCHOS_TEST_FOR_EXCEPTION
816 (A_.is_null (), std::logic_error, prefix << "A_ is null. "
817 "Please report this bug to the Ifpack2 developers.");
818 TEUCHOS_TEST_FOR_EXCEPTION
819 (A_crs_.is_null (), std::logic_error, prefix << "A_crs_ is null. "
820 "Please report this bug to the Ifpack2 developers.");
821 // However, it _is_ possible that the user called resumeFill() on
822 // the matrix, after calling compute(). This is NOT allowed.
823 TEUCHOS_TEST_FOR_EXCEPTION
824 (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
825 "method, the matrix must be fill complete. It is not. This means that "
826 " you must have called resumeFill() on the matrix before calling apply(). "
827 "This is NOT allowed. Note that this class may use the matrix's data in "
828 "place without copying it. Thus, you cannot change the matrix and expect "
829 "the solver to stay the same. If you have changed the matrix, first call "
830 "fillComplete() on it, then call compute() on this object, before you call"
831 " apply(). You do NOT need to call setMatrix, as long as the matrix "
832 "itself (that is, its address in memory) is the same.");
833 }
834 else {
835 for (int i = 0; i < num_streams_; i++) {
836 TEUCHOS_TEST_FOR_EXCEPTION
837 (A_crs_v_[i].is_null (), std::logic_error, prefix << "A_crs_ is null. "
838 "Please report this bug to the Ifpack2 developers.");
839 // However, it _is_ possible that the user called resumeFill() on
840 // the matrix, after calling compute(). This is NOT allowed.
841 TEUCHOS_TEST_FOR_EXCEPTION
842 (! A_crs_v_[i]->isFillComplete (), std::runtime_error, "If you call this "
843 "method, the matrix must be fill complete. It is not. This means that "
844 " you must have called resumeFill() on the matrix before calling apply(). "
845 "This is NOT allowed. Note that this class may use the matrix's data in "
846 "place without copying it. Thus, you cannot change the matrix and expect "
847 "the solver to stay the same. If you have changed the matrix, first call "
848 "fillComplete() on it, then call compute() on this object, before you call"
849 " apply(). You do NOT need to call setMatrix, as long as the matrix "
850 "itself (that is, its address in memory) is the same.");
851 }
852 }
853
854 RCP<const MV> X_cur;
855 RCP<MV> Y_cur;
856
857 if (!isKokkosKernelsStream_) {
858 auto G = A_crs_->getGraph ();
859 TEUCHOS_TEST_FOR_EXCEPTION
860 (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
861 "but A_crs_'s RowGraph G is null. "
862 "Please report this bug to the Ifpack2 developers.");
863 auto importer = G->getImporter ();
864 auto exporter = G->getExporter ();
865
866 if (! importer.is_null ()) {
867 if (X_colMap_.is_null () || X_colMap_->getNumVectors () != X.getNumVectors ()) {
868 X_colMap_ = rcp (new MV (importer->getTargetMap (), X.getNumVectors ()));
869 }
870 else {
871 X_colMap_->putScalar (STS::zero ());
872 }
873 // See discussion of Github Issue #672 for why the Import needs to
874 // use the ZERO CombineMode. The case where the Export is
875 // nontrivial is likely never exercised.
876 X_colMap_->doImport (X, *importer, Tpetra::ZERO);
877 }
878 X_cur = importer.is_null () ? rcpFromRef (X) :
879 Teuchos::rcp_const_cast<const MV> (X_colMap_);
880
881 if (! exporter.is_null ()) {
882 if (Y_rowMap_.is_null () || Y_rowMap_->getNumVectors () != Y.getNumVectors ()) {
883 Y_rowMap_ = rcp (new MV (exporter->getSourceMap (), Y.getNumVectors ()));
884 }
885 else {
886 Y_rowMap_->putScalar (STS::zero ());
887 }
888 Y_rowMap_->doExport (Y, *importer, Tpetra::ADD);
889 }
890 Y_cur = exporter.is_null () ? rcpFromRef (Y) : Y_rowMap_;
891 }
892 else {
893 // Currently assume X and Y are local vectors (same sizes as A_crs_).
894 // Should do a better job here!!!
895 X_cur = rcpFromRef (X);
896 Y_cur = rcpFromRef (Y);
897 }
898
899 localApply (*X_cur, *Y_cur, mode, alpha, beta);
900
901 if (!isKokkosKernelsStream_) {
902 auto G = A_crs_->getGraph ();
903 auto exporter = G->getExporter ();
904 if (! exporter.is_null ()) {
905 Y.putScalar (STS::zero ());
906 Y.doExport (*Y_cur, *exporter, Tpetra::ADD);
907 }
908 }
909 ++numApply_;
910}
911
912template<class MatrixType>
913void
915localTriangularSolve (const MV& Y,
916 MV& X,
917 const Teuchos::ETransp mode) const
918{
919 using Teuchos::CONJ_TRANS;
920 using Teuchos::NO_TRANS;
921 using Teuchos::TRANS;
922 const char tfecfFuncName[] = "localTriangularSolve: ";
923
924 if (!isKokkosKernelsStream_)
925 {
926 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
927 (! A_crs_->isFillComplete (), std::runtime_error,
928 "The matrix is not fill complete.");
929 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
930 ( A_crs_->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
931 "The matrix is neither upper triangular or lower triangular. "
932 "You may only call this method if the matrix is triangular. "
933 "Remember that this is a local (per MPI process) property, and that "
934 "Tpetra only knows how to do a local (per process) triangular solve.");
935 }
936 else
937 {
938 for (int i = 0; i < num_streams_; i++) {
939 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
940 (! A_crs_v_[i]->isFillComplete (), std::runtime_error,
941 "The matrix is not fill complete.");
942 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
943 ( A_crs_v_[i]->getLocalNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
944 "The matrix is neither upper triangular or lower triangular. "
945 "You may only call this method if the matrix is triangular. "
946 "Remember that this is a local (per MPI process) property, and that "
947 "Tpetra only knows how to do a local (per process) triangular solve.");
948 }
949 }
950 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
951 (! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument,
952 "X and Y must be constant stride.");
953 using STS = Teuchos::ScalarTraits<scalar_type>;
954 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
955 (STS::isComplex && mode == TRANS, std::logic_error, "This method does "
956 "not currently support non-conjugated transposed solve (mode == "
957 "Teuchos::TRANS) for complex scalar types.");
958
959 const std::string uplo = this->uplo_;
960 const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
961 (mode == Teuchos::TRANS ? "T" : "N");
962 const size_t numVecs = std::min (X.getNumVectors (), Y.getNumVectors ());
963
964 if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_ && trans == "N")
965 {
966 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (this->A_);
967 auto A_lclk = A_crs->getLocalMatrixDevice ();
968 auto ptr = A_lclk.graph.row_map;
969 auto ind = A_lclk.graph.entries;
970 auto val = A_lclk.values;
971
972 for (size_t j = 0; j < numVecs; ++j) {
973 auto X_j = X.getVectorNonConst (j);
974 auto Y_j = Y.getVector (j);
975 auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
976 auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
977 auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
978 auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
979 KokkosSparse::Experimental::sptrsv_solve(kh_.getRawPtr(), ptr, ind, val, Y_lcl_1d, X_lcl_1d);
980 // TODO is this fence needed...
981 typename k_handle::HandleExecSpace().fence();
982 }
983 } // End using regular interface of Kokkos Kernels Sptrsv
984 else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_ && trans == "N")
985 {
986 std::vector<lno_row_view_t> ptr_v(num_streams_);
987 std::vector<lno_nonzero_view_t> ind_v(num_streams_);
988 std::vector<scalar_nonzero_view_t> val_v(num_streams_);
989 std::vector<k_handle *> KernelHandle_rawptr_v_(num_streams_);
990 for (size_t j = 0; j < numVecs; ++j) {
991 auto X_j = X.getVectorNonConst (j);
992 auto Y_j = Y.getVector (j);
993 auto X_lcl = X_j->getLocalViewDevice (Tpetra::Access::ReadWrite);
994 auto Y_lcl = Y_j->getLocalViewDevice (Tpetra::Access::ReadOnly);
995 auto X_lcl_1d = Kokkos::subview (X_lcl, Kokkos::ALL (), 0);
996 auto Y_lcl_1d = Kokkos::subview (Y_lcl, Kokkos::ALL (), 0);
997 std::vector<decltype(X_lcl_1d)> x_v(num_streams_);
998 std::vector<decltype(Y_lcl_1d)> y_v(num_streams_);
999 local_ordinal_type stream_begin = 0;
1000 local_ordinal_type stream_end;
1001 for (int i = 0; i < num_streams_; i++) {
1002 auto A_crs_i = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_crs_v_[i]);
1003 auto Alocal_i = A_crs_i->getLocalMatrixDevice();
1004 ptr_v[i] = Alocal_i.graph.row_map;
1005 ind_v[i] = Alocal_i.graph.entries;
1006 val_v[i] = Alocal_i.values;
1007 stream_end = stream_begin + Alocal_i.numRows();
1008 x_v[i] = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1009 y_v[i] = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);;
1010 KernelHandle_rawptr_v_[i] = kh_v_[i].getRawPtr();
1011 stream_begin = stream_end;
1012 }
1013 Kokkos::fence();
1014 KokkosSparse::Experimental::sptrsv_solve_streams( exec_space_instances_, KernelHandle_rawptr_v_,
1015 ptr_v, ind_v, val_v, y_v, x_v );
1016 Kokkos::fence();
1017 }
1018 } // End using stream interface of Kokkos Kernels Sptrsv
1019 else
1020 {
1021 const std::string diag = this->diag_;
1022 // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
1023 // sequential, host-only code. See
1024 // https://github.com/kokkos/kokkos-kernels/issues/48.
1025
1026 auto A_lcl = this->A_crs_->getLocalMatrixHost ();
1027
1028 if (X.isConstantStride () && Y.isConstantStride ()) {
1029 auto X_lcl = X.getLocalViewHost (Tpetra::Access::ReadWrite);
1030 auto Y_lcl = Y.getLocalViewHost (Tpetra::Access::ReadOnly);
1031 KokkosSparse::trsv (uplo.c_str (), trans.c_str (), diag.c_str (),
1032 A_lcl, Y_lcl, X_lcl);
1033 }
1034 else {
1035 for (size_t j = 0; j < numVecs; ++j) {
1036 auto X_j = X.getVectorNonConst (j);
1037 auto Y_j = Y.getVector (j);
1038 auto X_lcl = X_j->getLocalViewHost (Tpetra::Access::ReadWrite);
1039 auto Y_lcl = Y_j->getLocalViewHost (Tpetra::Access::ReadOnly);
1040 KokkosSparse::trsv (uplo.c_str (), trans.c_str (),
1041 diag.c_str (), A_lcl, Y_lcl, X_lcl);
1042 }
1043 }
1044 }
1045}
1046
1047template<class MatrixType>
1048void
1050localApply (const MV& X,
1051 MV& Y,
1052 const Teuchos::ETransp mode,
1053 const scalar_type& alpha,
1054 const scalar_type& beta) const
1055{
1056 if (mode == Teuchos::NO_TRANS && Teuchos::nonnull (htsImpl_) &&
1057 htsImpl_->isComputed ()) {
1058 htsImpl_->localApply (X, Y, mode, alpha, beta);
1059 return;
1060 }
1061
1062 using Teuchos::RCP;
1063 typedef scalar_type ST;
1064 typedef Teuchos::ScalarTraits<ST> STS;
1065
1066 if (beta == STS::zero ()) {
1067 if (alpha == STS::zero ()) {
1068 Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y)
1069 }
1070 else { // alpha != 0
1071 this->localTriangularSolve (X, Y, mode);
1072 if (alpha != STS::one ()) {
1073 Y.scale (alpha);
1074 }
1075 }
1076 }
1077 else { // beta != 0
1078 if (alpha == STS::zero ()) {
1079 Y.scale (beta); // Y := beta * Y
1080 }
1081 else { // alpha != 0
1082 MV Y_tmp (Y, Teuchos::Copy);
1083 this->localTriangularSolve (X, Y_tmp, mode); // Y_tmp := M * X
1084 Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
1085 }
1086 }
1087}
1088
1089
1090template <class MatrixType>
1091int
1093getNumInitialize () const {
1094 return numInitialize_;
1095}
1096
1097template <class MatrixType>
1098int
1100getNumCompute () const {
1101 return numCompute_;
1102}
1103
1104template <class MatrixType>
1105int
1107getNumApply () const {
1108 return numApply_;
1109}
1110
1111template <class MatrixType>
1112double
1114getInitializeTime () const {
1115 return initializeTime_;
1116}
1117
1118template<class MatrixType>
1119double
1121getComputeTime () const {
1122 return computeTime_;
1123}
1124
1125template<class MatrixType>
1126double
1128getApplyTime() const {
1129 return applyTime_;
1130}
1131
1132template <class MatrixType>
1133std::string
1135description () const
1136{
1137 std::ostringstream os;
1138
1139 // Output is a valid YAML dictionary in flow style. If you don't
1140 // like everything on a single line, you should call describe()
1141 // instead.
1142 os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
1143 if (this->getObjectLabel () != "") {
1144 os << "Label: \"" << this->getObjectLabel () << "\", ";
1145 }
1146 os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
1147 << "Computed: " << (isComputed () ? "true" : "false") << ", ";
1148
1149 if(isKokkosKernelsSptrsv_) os << "KK-SPTRSV, ";
1150 if(isKokkosKernelsStream_) os << "KK-SolveStream, ";
1151
1152 if (A_.is_null ()) {
1153 os << "Matrix: null";
1154 }
1155 else {
1156 os << "Matrix dimensions: ["
1157 << A_->getGlobalNumRows () << ", "
1158 << A_->getGlobalNumCols () << "]"
1159 << ", Number of nonzeros: " << A_->getGlobalNumEntries();
1160 }
1161
1162 if (Teuchos::nonnull (htsImpl_))
1163 os << ", HTS computed: " << (htsImpl_->isComputed () ? "true" : "false");
1164 os << "}";
1165 return os.str ();
1166}
1167
1168template <class MatrixType>
1170describe (Teuchos::FancyOStream& out,
1171 const Teuchos::EVerbosityLevel verbLevel) const
1172{
1173 using std::endl;
1174 // Default verbosity level is VERB_LOW, which prints only on Process
1175 // 0 of the matrix's communicator.
1176 const Teuchos::EVerbosityLevel vl
1177 = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
1178
1179 if (vl != Teuchos::VERB_NONE) {
1180 // Print only on Process 0 in the matrix's communicator. If the
1181 // matrix is null, though, we have to get the communicator from
1182 // somewhere, so we ask Tpetra for its default communicator. If
1183 // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
1184 auto comm = A_.is_null () ?
1185 Tpetra::getDefaultComm () :
1186 A_->getComm ();
1187
1188 // Users aren't supposed to do anything with the matrix on
1189 // processes where its communicator is null.
1190 if (! comm.is_null () && comm->getRank () == 0) {
1191 // By convention, describe() should always begin with a tab.
1192 Teuchos::OSTab tab0 (out);
1193 // Output is in YAML format. We have to escape the class name,
1194 // because it has a colon.
1195 out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
1196 Teuchos::OSTab tab1 (out);
1197 out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name () << endl
1198 << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name () << endl
1199 << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name () << endl
1200 << "Node: " << Teuchos::TypeNameTraits<node_type>::name () << endl;
1201 }
1202 }
1203}
1204
1205template <class MatrixType>
1206Teuchos::RCP<const typename LocalSparseTriangularSolver<MatrixType>::map_type>
1208getDomainMap () const
1209{
1210 TEUCHOS_TEST_FOR_EXCEPTION
1211 (A_.is_null (), std::runtime_error,
1212 "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
1213 "The matrix is null. Please call setMatrix() with a nonnull input "
1214 "before calling this method.");
1215 return A_->getDomainMap ();
1216}
1217
1218template <class MatrixType>
1219Teuchos::RCP<const typename LocalSparseTriangularSolver<MatrixType>::map_type>
1221getRangeMap () const
1222{
1223 TEUCHOS_TEST_FOR_EXCEPTION
1224 (A_.is_null (), std::runtime_error,
1225 "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
1226 "The matrix is null. Please call setMatrix() with a nonnull input "
1227 "before calling this method.");
1228 return A_->getRangeMap ();
1229}
1230
1231template<class MatrixType>
1233setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
1234{
1235 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
1236
1237 // If the pointer didn't change, do nothing. This is reasonable
1238 // because users are supposed to call this method with the same
1239 // object over all participating processes, and pointer identity
1240 // implies object identity.
1241 if (A.getRawPtr () != A_.getRawPtr () || isInternallyChanged_) {
1242 // Check in serial or one-process mode if the matrix is square.
1243 TEUCHOS_TEST_FOR_EXCEPTION
1244 (! A.is_null () && A->getComm ()->getSize () == 1 &&
1245 A->getLocalNumRows () != A->getLocalNumCols (),
1246 std::runtime_error, prefix << "If A's communicator only contains one "
1247 "process, then A must be square. Instead, you provided a matrix A with "
1248 << A->getLocalNumRows () << " rows and " << A->getLocalNumCols ()
1249 << " columns.");
1250
1251 // It's legal for A to be null; in that case, you may not call
1252 // initialize() until calling setMatrix() with a nonnull input.
1253 // Regardless, setting the matrix invalidates the preconditioner.
1254 isInitialized_ = false;
1255 isComputed_ = false;
1256
1257 if (A.is_null ()) {
1258 A_crs_ = Teuchos::null;
1259 A_ = Teuchos::null;
1260 }
1261 else { // A is not null
1262 Teuchos::RCP<const crs_matrix_type> A_crs =
1263 Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
1264 TEUCHOS_TEST_FOR_EXCEPTION
1265 (A_crs.is_null (), std::invalid_argument, prefix <<
1266 "The input matrix A is not a Tpetra::CrsMatrix.");
1267 A_crs_ = A_crs;
1268 A_ = A;
1269 }
1270
1271 if (Teuchos::nonnull (htsImpl_))
1272 htsImpl_->reset ();
1273 } // pointers are not the same
1274}
1275
1276template<class MatrixType>
1277void
1279setStreamInfo (const bool& isKokkosKernelsStream, const int& num_streams,
1280 const std::vector<HandleExecSpace>& exec_space_instances)
1281{
1282 isKokkosKernelsStream_ = isKokkosKernelsStream;
1283 num_streams_ = num_streams;
1284 exec_space_instances_ = exec_space_instances;
1285 A_crs_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
1286}
1287
1288template<class MatrixType>
1290setMatrices (const std::vector< Teuchos::RCP<crs_matrix_type> >& A_crs_v)
1291{
1292 const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrixWithStreams: ";
1293
1294 for(int i = 0; i < num_streams_; i++) {
1295 // If the pointer didn't change, do nothing. This is reasonable
1296 // because users are supposed to call this method with the same
1297 // object over all participating processes, and pointer identity
1298 // implies object identity.
1299 if (A_crs_v[i].getRawPtr () != A_crs_v_[i].getRawPtr () || isInternallyChanged_) {
1300 // Check in serial or one-process mode if the matrix is square.
1301 TEUCHOS_TEST_FOR_EXCEPTION
1302 (! A_crs_v[i].is_null () && A_crs_v[i]->getComm ()->getSize () == 1 &&
1303 A_crs_v[i]->getLocalNumRows () != A_crs_v[i]->getLocalNumCols (),
1304 std::runtime_error, prefix << "If A's communicator only contains one "
1305 "process, then A must be square. Instead, you provided a matrix A with "
1306 << A_crs_v[i]->getLocalNumRows () << " rows and " << A_crs_v[i]->getLocalNumCols ()
1307 << " columns.");
1308
1309 // It's legal for A to be null; in that case, you may not call
1310 // initialize() until calling setMatrix() with a nonnull input.
1311 // Regardless, setting the matrix invalidates the preconditioner.
1312 isInitialized_ = false;
1313 isComputed_ = false;
1314
1315 if (A_crs_v[i].is_null ()) {
1316 A_crs_v_[i] = Teuchos::null;
1317 }
1318 else { // A is not null
1319 Teuchos::RCP<crs_matrix_type> A_crs =
1320 Teuchos::rcp_dynamic_cast<crs_matrix_type> (A_crs_v[i]);
1321 TEUCHOS_TEST_FOR_EXCEPTION
1322 (A_crs.is_null (), std::invalid_argument, prefix <<
1323 "The input matrix A is not a Tpetra::CrsMatrix.");
1324 A_crs_v_[i] = A_crs;
1325 }
1326 } // pointers are not the same
1327 }
1328}
1329
1330} // namespace Ifpack2
1331
1332#define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S,LO,GO,N) \
1333 template class Ifpack2::LocalSparseTriangularSolver< Tpetra::RowMatrix<S, LO, GO, N> >;
1334
1335#endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
"Preconditioner" that solves local sparse triangular systems.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:55
bool isComputed() const
Return true if compute() has been called.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:189
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1170
std::string description() const
A one-line description of this object.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1135
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:69
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:300
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:60
void setParameters(const Teuchos::ParameterList &params)
Set this object's parameters.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:358
bool isInitialized() const
Return true if the preconditioner has been successfully initialized.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:178
double getComputeTime() const
Return the time spent in compute().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1121
void initialize()
"Symbolic" phase of setup
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:405
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:62
int getNumCompute() const
Return the number of calls to compute().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1100
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
This operator's communicator.
double getInitializeTime() const
Return the time spent in initialize().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1114
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1208
MatrixType::scalar_type scalar_type
Type of the entries of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:58
MatrixType::node_type node_type
Node type of the input matrix.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:64
int getNumInitialize() const
Return the number of calls to initialize().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1093
void setStreamInfo(const bool &isKokkosKernelsStream, const int &num_streams, const std::vector< HandleExecSpace > &exec_space_instances)
Set this triangular solver's stream information.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1279
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:75
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1221
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner's matrix.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1233
void setMatrices(const std::vector< Teuchos::RCP< crs_matrix_type > > &A_crs_v)
Set this preconditioner's matrices (used by stream interface of triangular solve).
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1290
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:339
double getApplyTime() const
Return the time spent in apply().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1128
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:756
int getNumApply() const
Return the number of calls to apply().
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:1107
void compute()
"Numeric" phase of setup
Definition Ifpack2_LocalSparseTriangularSolver_def.hpp:652
TRANS
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:41