Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_RILUK_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_CRSRILUK_DEF_HPP
11#define IFPACK2_CRSRILUK_DEF_HPP
12
14#include "Ifpack2_LocalFilter.hpp"
15#include "Tpetra_CrsMatrix.hpp"
16#include "Teuchos_StandardParameterEntryValidators.hpp"
17#include "Ifpack2_LocalSparseTriangularSolver.hpp"
18#include "Ifpack2_Details_getParamTryingTypes.hpp"
19#include "Ifpack2_Details_getCrsMatrix.hpp"
20#include "Kokkos_Sort.hpp"
21#include "KokkosSparse_Utils.hpp"
22#include "KokkosKernels_Sorting.hpp"
23#include "KokkosSparse_IOUtils.hpp"
24
25namespace Ifpack2 {
26
27namespace Details {
28struct IlukImplType {
29 enum Enum {
30 Serial,
31 KSPILUK
32 };
33
34 static void loadPLTypeOption (Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
35 type_strs.resize(2);
36 type_strs[0] = "Serial";
37 type_strs[1] = "KSPILUK";
38 type_enums.resize(2);
39 type_enums[0] = Serial;
40 type_enums[1] = KSPILUK;
41 }
42};
43}
44
45template<class MatrixType>
46RILUK<MatrixType>::RILUK (const Teuchos::RCP<const row_matrix_type>& Matrix_in)
47 : A_ (Matrix_in),
48 LevelOfFill_ (0),
49 Overalloc_ (2.),
50 isAllocated_ (false),
51 isInitialized_ (false),
52 isComputed_ (false),
53 numInitialize_ (0),
54 numCompute_ (0),
55 numApply_ (0),
56 initializeTime_ (0.0),
57 computeTime_ (0.0),
58 applyTime_ (0.0),
59 RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
60 Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
61 Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
63 isKokkosKernelsStream_(false),
64 num_streams_(0),
65 hasStreamReordered_(false)
66{
67 allocateSolvers();
68}
69
70
71template<class MatrixType>
72RILUK<MatrixType>::RILUK (const Teuchos::RCP<const crs_matrix_type>& Matrix_in)
73 : A_ (Matrix_in),
74 LevelOfFill_ (0),
75 Overalloc_ (2.),
76 isAllocated_ (false),
77 isInitialized_ (false),
78 isComputed_ (false),
79 numInitialize_ (0),
80 numCompute_ (0),
81 numApply_ (0),
82 initializeTime_ (0.0),
83 computeTime_ (0.0),
84 applyTime_ (0.0),
85 RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
86 Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
87 Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
89 isKokkosKernelsStream_(false),
90 num_streams_(0),
91 hasStreamReordered_(false)
92{
93 allocateSolvers();
94}
95
96
97template<class MatrixType>
99{
100 if (!isKokkosKernelsStream_) {
101 if (Teuchos::nonnull (KernelHandle_)) {
102 KernelHandle_->destroy_spiluk_handle();
103 }
104 }
105 else {
106 for (int i = 0; i < num_streams_; i++) {
107 if (Teuchos::nonnull (KernelHandle_v_[i])) {
108 KernelHandle_v_[i]->destroy_spiluk_handle();
109 }
110 }
111 }
112}
113
114template<class MatrixType>
115void RILUK<MatrixType>::allocateSolvers ()
116{
117 L_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> ());
118 L_solver_->setObjectLabel("lower");
119 U_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> ());
120 U_solver_->setObjectLabel("upper");
121}
122
123template<class MatrixType>
124void
125RILUK<MatrixType>::setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
126{
127 // It's legal for A to be null; in that case, you may not call
128 // initialize() until calling setMatrix() with a nonnull input.
129 // Regardless, setting the matrix invalidates any previous
130 // factorization.
131 if (A.getRawPtr () != A_.getRawPtr ()) {
132 isAllocated_ = false;
133 isInitialized_ = false;
134 isComputed_ = false;
135 A_local_ = Teuchos::null;
136 Graph_ = Teuchos::null;
137
138 // The sparse triangular solvers get a triangular factor as their
139 // input matrix. The triangular factors L_ and U_ are getting
140 // reset, so we reset the solvers' matrices to null. Do that
141 // before setting L_ and U_ to null, so that latter step actually
142 // frees the factors.
143 if (! L_solver_.is_null ()) {
144 L_solver_->setMatrix (Teuchos::null);
145 }
146 if (! U_solver_.is_null ()) {
147 U_solver_->setMatrix (Teuchos::null);
148 }
149
150 L_ = Teuchos::null;
151 U_ = Teuchos::null;
152 D_ = Teuchos::null;
153 A_ = A;
154 }
155}
156
157
158
159template<class MatrixType>
162{
163 TEUCHOS_TEST_FOR_EXCEPTION(
164 L_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
165 "is null. Please call initialize() (and preferably also compute()) "
166 "before calling this method. If the input matrix has not yet been set, "
167 "you must first call setMatrix() with a nonnull input matrix before you "
168 "may call initialize() or compute().");
169 return *L_;
170}
171
172
173template<class MatrixType>
174const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
179{
180 TEUCHOS_TEST_FOR_EXCEPTION(
181 D_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
182 "(of diagonal entries) is null. Please call initialize() (and "
183 "preferably also compute()) before calling this method. If the input "
184 "matrix has not yet been set, you must first call setMatrix() with a "
185 "nonnull input matrix before you may call initialize() or compute().");
186 return *D_;
187}
188
189
190template<class MatrixType>
193{
194 TEUCHOS_TEST_FOR_EXCEPTION(
195 U_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
196 "is null. Please call initialize() (and preferably also compute()) "
197 "before calling this method. If the input matrix has not yet been set, "
198 "you must first call setMatrix() with a nonnull input matrix before you "
199 "may call initialize() or compute().");
200 return *U_;
201}
202
203
204template<class MatrixType>
206 TEUCHOS_TEST_FOR_EXCEPTION(
207 A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getNodeSmootherComplexity: "
208 "The input matrix A is null. Please call setMatrix() with a nonnull "
209 "input matrix, then call compute(), before calling this method.");
210 // RILUK methods cost roughly one apply + the nnz in the upper+lower triangles
211 if(!L_.is_null() && !U_.is_null())
212 return A_->getLocalNumEntries() + L_->getLocalNumEntries() + U_->getLocalNumEntries();
213 else
214 return 0;
215}
216
217
218template<class MatrixType>
219Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
223 TEUCHOS_TEST_FOR_EXCEPTION(
224 A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getDomainMap: "
225 "The matrix is null. Please call setMatrix() with a nonnull input "
226 "before calling this method.");
227
228 // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
229 TEUCHOS_TEST_FOR_EXCEPTION(
230 Graph_.is_null (), std::runtime_error, "Ifpack2::RILUK::getDomainMap: "
231 "The computed graph is null. Please call initialize() before calling "
232 "this method.");
233 return Graph_->getL_Graph ()->getDomainMap ();
234}
235
236
237template<class MatrixType>
238Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
242 TEUCHOS_TEST_FOR_EXCEPTION(
243 A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getRangeMap: "
244 "The matrix is null. Please call setMatrix() with a nonnull input "
245 "before calling this method.");
246
247 // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
248 TEUCHOS_TEST_FOR_EXCEPTION(
249 Graph_.is_null (), std::runtime_error, "Ifpack2::RILUK::getRangeMap: "
250 "The computed graph is null. Please call initialize() before calling "
251 "this method.");
252 return Graph_->getL_Graph ()->getRangeMap ();
253}
254
255
256template<class MatrixType>
257void RILUK<MatrixType>::allocate_L_and_U ()
258{
259 using Teuchos::null;
260 using Teuchos::rcp;
261
262 if (! isAllocated_) {
263 if (!isKokkosKernelsStream_) {
264 // Deallocate any existing storage. This avoids storing 2x
265 // memory, since RCP op= does not deallocate until after the
266 // assignment.
267 L_ = null;
268 U_ = null;
269 D_ = null;
270
271 // Allocate Matrix using ILUK graphs
272 L_ = rcp (new crs_matrix_type (Graph_->getL_Graph ()));
273 U_ = rcp (new crs_matrix_type (Graph_->getU_Graph ()));
274 L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
275 U_->setAllToScalar (STS::zero ());
276
277 // FIXME (mfh 24 Jan 2014) This assumes domain == range Map for L and U.
278 L_->fillComplete ();
279 U_->fillComplete ();
280 D_ = rcp (new vec_type (Graph_->getL_Graph ()->getRowMap ()));
281 }
282 else {
283 L_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
284 U_v_ = std::vector< Teuchos::RCP<crs_matrix_type> >(num_streams_);
285 for (int i = 0; i < num_streams_; i++) {
286 L_v_[i] = null;
287 U_v_[i] = null;
288
289 L_v_[i] = rcp (new crs_matrix_type (Graph_v_[i]->getL_Graph ()));
290 U_v_[i] = rcp (new crs_matrix_type (Graph_v_[i]->getU_Graph ()));
291 L_v_[i]->setAllToScalar (STS::zero ()); // Zero out L and U matrices
292 U_v_[i]->setAllToScalar (STS::zero ());
293
294 L_v_[i]->fillComplete ();
295 U_v_[i]->fillComplete ();
296 }
297 }
298 }
299 isAllocated_ = true;
300}
301
302
303template<class MatrixType>
304void
306setParameters (const Teuchos::ParameterList& params)
307{
308 using Teuchos::RCP;
309 using Teuchos::ParameterList;
310 using Teuchos::Array;
311 using Details::getParamTryingTypes;
312 const char prefix[] = "Ifpack2::RILUK: ";
313
314 // Default values of the various parameters.
315 int fillLevel = 0;
316 magnitude_type absThresh = STM::zero ();
317 magnitude_type relThresh = STM::one ();
318 magnitude_type relaxValue = STM::zero ();
319 double overalloc = 2.;
320 int nstreams = 0;
321
322 // "fact: iluk level-of-fill" parsing is more complicated, because
323 // we want to allow as many types as make sense. int is the native
324 // type, but we also want to accept double (for backwards
325 // compatibilty with ILUT). You can't cast arbitrary magnitude_type
326 // (e.g., Sacado::MP::Vector) to int, so we use float instead, to
327 // get coverage of the most common magnitude_type cases. Weirdly,
328 // there's an Ifpack2 test that sets the fill level as a
329 // global_ordinal_type.
330 {
331 const std::string paramName ("fact: iluk level-of-fill");
332 getParamTryingTypes<int, int, global_ordinal_type, double, float>
333 (fillLevel, params, paramName, prefix);
334 }
335 // For the other parameters, we prefer magnitude_type, but allow
336 // double for backwards compatibility.
337 {
338 const std::string paramName ("fact: absolute threshold");
339 getParamTryingTypes<magnitude_type, magnitude_type, double>
340 (absThresh, params, paramName, prefix);
341 }
342 {
343 const std::string paramName ("fact: relative threshold");
344 getParamTryingTypes<magnitude_type, magnitude_type, double>
345 (relThresh, params, paramName, prefix);
346 }
347 {
348 const std::string paramName ("fact: relax value");
349 getParamTryingTypes<magnitude_type, magnitude_type, double>
350 (relaxValue, params, paramName, prefix);
351 }
352 {
353 const std::string paramName ("fact: iluk overalloc");
354 getParamTryingTypes<double, double>
355 (overalloc, params, paramName, prefix);
356 }
357
358 // Parsing implementation type
359 Details::IlukImplType::Enum ilukimplType = Details::IlukImplType::Serial;
360 do {
361 static const char typeName[] = "fact: type";
362
363 if ( ! params.isType<std::string>(typeName)) break;
364
365 // Map std::string <-> IlukImplType::Enum.
366 Array<std::string> ilukimplTypeStrs;
367 Array<Details::IlukImplType::Enum> ilukimplTypeEnums;
368 Details::IlukImplType::loadPLTypeOption (ilukimplTypeStrs, ilukimplTypeEnums);
369 Teuchos::StringToIntegralParameterEntryValidator<Details::IlukImplType::Enum>
370 s2i(ilukimplTypeStrs (), ilukimplTypeEnums (), typeName, false);
371
372 ilukimplType = s2i.getIntegralValue(params.get<std::string>(typeName));
373 } while (0);
374
375 if (ilukimplType == Details::IlukImplType::KSPILUK) {
376 this->isKokkosKernelsSpiluk_ = true;
377 }
378 else {
379 this->isKokkosKernelsSpiluk_ = false;
380 }
381
382 {
383 const std::string paramName ("fact: kspiluk number-of-streams");
384 getParamTryingTypes<int, int, global_ordinal_type>
385 (nstreams, params, paramName, prefix);
386 }
387
388 // Forward to trisolvers.
389 L_solver_->setParameters(params);
390 U_solver_->setParameters(params);
391
392 // "Commit" the values only after validating all of them. This
393 // ensures that there are no side effects if this routine throws an
394 // exception.
395
396 LevelOfFill_ = fillLevel;
397 Overalloc_ = overalloc;
398#ifdef KOKKOS_ENABLE_OPENMP
399 if constexpr (std::is_same_v<execution_space, Kokkos::OpenMP>) {
400 nstreams = std::min(nstreams, execution_space{}.concurrency());
401 }
402#endif
403 num_streams_ = nstreams;
404
405 if (num_streams_ >= 1) {
406 this->isKokkosKernelsStream_ = true;
407 // Will we do reordering in streams?
408 if (params.isParameter("fact: kspiluk reordering in streams"))
409 hasStreamReordered_ = params.get<bool> ("fact: kspiluk reordering in streams");
411 else {
412 this->isKokkosKernelsStream_ = false;
413 }
414
415 // mfh 28 Nov 2012: The previous code would not assign Athresh_,
416 // Rthresh_, or RelaxValue_, if the read-in value was -1. I don't
417 // know if keeping this behavior is correct, but I'll keep it just
418 // so as not to change previous behavior.
419
420 if (absThresh != -STM::one ()) {
421 Athresh_ = absThresh;
423 if (relThresh != -STM::one ()) {
424 Rthresh_ = relThresh;
425 }
426 if (relaxValue != -STM::one ()) {
427 RelaxValue_ = relaxValue;
428 }
429}
430
431
432template<class MatrixType>
433Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
435 return Teuchos::rcp_implicit_cast<const row_matrix_type> (A_);
436}
437
438
439template<class MatrixType>
440Teuchos::RCP<const typename RILUK<MatrixType>::crs_matrix_type>
442 return Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_, true);
443}
444
445
446template<class MatrixType>
447Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
448RILUK<MatrixType>::makeLocalFilter (const Teuchos::RCP<const row_matrix_type>& A)
449{
450 using Teuchos::RCP;
451 using Teuchos::rcp;
452 using Teuchos::rcp_dynamic_cast;
453 using Teuchos::rcp_implicit_cast;
455 // If A_'s communicator only has one process, or if its column and
456 // row Maps are the same, then it is already local, so use it
457 // directly.
458 if (A->getRowMap ()->getComm ()->getSize () == 1 ||
459 A->getRowMap ()->isSameAs (* (A->getColMap ()))) {
460 return A;
461 }
462
463 // If A_ is already a LocalFilter, then use it directly. This
464 // should be the case if RILUK is being used through
465 // AdditiveSchwarz, for example.
466 RCP<const LocalFilter<row_matrix_type> > A_lf_r =
467 rcp_dynamic_cast<const LocalFilter<row_matrix_type> > (A);
468 if (! A_lf_r.is_null ()) {
469 return rcp_implicit_cast<const row_matrix_type> (A_lf_r);
470 }
471 else {
472 // A_'s communicator has more than one process, its row Map and
473 // its column Map differ, and A_ is not a LocalFilter. Thus, we
474 // have to wrap it in a LocalFilter.
475 return rcp (new LocalFilter<row_matrix_type> (A));
476 }
477}
478
479
480template<class MatrixType>
482{
483 using Teuchos::RCP;
484 using Teuchos::rcp;
485 using Teuchos::rcp_const_cast;
486 using Teuchos::rcp_dynamic_cast;
487 using Teuchos::rcp_implicit_cast;
488 using Teuchos::Array;
489 using Teuchos::ArrayView;
490 typedef Tpetra::CrsGraph<local_ordinal_type,
492 node_type> crs_graph_type;
493 typedef Tpetra::Map<local_ordinal_type,
495 node_type> crs_map_type;
496 const char prefix[] = "Ifpack2::RILUK::initialize: ";
497
498 TEUCHOS_TEST_FOR_EXCEPTION
499 (A_.is_null (), std::runtime_error, prefix << "The matrix is null. Please "
500 "call setMatrix() with a nonnull input before calling this method.");
501 TEUCHOS_TEST_FOR_EXCEPTION
502 (! A_->isFillComplete (), std::runtime_error, prefix << "The matrix is not "
503 "fill complete. You may not invoke initialize() or compute() with this "
504 "matrix until the matrix is fill complete. If your matrix is a "
505 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
506 "range Maps, if appropriate) before calling this method.");
507
508 Teuchos::Time timer ("RILUK::initialize");
509 double startTime = timer.wallTime();
510 { // Start timing
511 Teuchos::TimeMonitor timeMon (timer);
512
513 // Calling initialize() means that the user asserts that the graph
514 // of the sparse matrix may have changed. We must not just reuse
515 // the previous graph in that case.
516 //
517 // Regarding setting isAllocated_ to false: Eventually, we may want
518 // some kind of clever memory reuse strategy, but it's always
519 // correct just to blow everything away and start over.
520 isInitialized_ = false;
521 isAllocated_ = false;
522 isComputed_ = false;
523 Graph_ = Teuchos::null;
524 Y_tmp_ = nullptr;
525 reordered_x_ = nullptr;
526 reordered_y_ = nullptr;
528 if (isKokkosKernelsStream_) {
529 Graph_v_ = std::vector< Teuchos::RCP<iluk_graph_type> >(num_streams_);
530 A_local_diagblks = std::vector<local_matrix_device_type>(num_streams_);
531 for (int i = 0; i < num_streams_; i++) {
532 Graph_v_[i] = Teuchos::null;
533 }
535
536 A_local_ = makeLocalFilter (A_);
538 TEUCHOS_TEST_FOR_EXCEPTION(
539 A_local_.is_null (), std::logic_error, "Ifpack2::RILUK::initialize: "
540 "makeLocalFilter returned null; it failed to compute A_local. "
541 "Please report this bug to the Ifpack2 developers.");
542
543 // FIXME (mfh 24 Jan 2014, 26 Mar 2014) It would be more efficient
544 // to rewrite RILUK so that it works with any RowMatrix input, not
545 // just CrsMatrix. (That would require rewriting IlukGraph to
546 // handle a Tpetra::RowGraph.) However, to make it work for now,
547 // we just copy the input matrix if it's not a CrsMatrix.
548
549 {
550 A_local_crs_ = Details::getCrsMatrix(A_local_);
551 if(A_local_crs_.is_null()) {
552 local_ordinal_type numRows = A_local_->getLocalNumRows();
553 Array<size_t> entriesPerRow(numRows);
554 for(local_ordinal_type i = 0; i < numRows; i++) {
555 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
556 }
557 A_local_crs_nc_ =
558 rcp (new crs_matrix_type (A_local_->getRowMap (),
559 A_local_->getColMap (),
560 entriesPerRow()));
561 // copy entries into A_local_crs
562 nonconst_local_inds_host_view_type indices("indices",A_local_->getLocalMaxNumRowEntries());
563 nonconst_values_host_view_type values("values",A_local_->getLocalMaxNumRowEntries());
564 for(local_ordinal_type i = 0; i < numRows; i++) {
565 size_t numEntries = 0;
566 A_local_->getLocalRowCopy(i, indices, values, numEntries);
567 A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()), indices.data());
568 }
569 A_local_crs_nc_->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
570 A_local_crs_ = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc_);
571 }
572 if (!isKokkosKernelsStream_) {
573 Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type,kk_handle_type> (A_local_crs_->getCrsGraph (),
574 LevelOfFill_, 0, Overalloc_));
575 }
576 else {
577 std::vector<int> weights(num_streams_);
578 std::fill(weights.begin(), weights.end(), 1);
579 exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights);
580
581 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
582 if (!hasStreamReordered_) {
583 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks);
584 } else {
585 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks, true);
586 reverse_perm_v_.resize(perm_v_.size());
587 for(size_t istream=0; istream < perm_v_.size(); ++istream) {
588 using perm_type = typename lno_nonzero_view_t::non_const_type;
589 const auto perm = perm_v_[istream];
590 const auto perm_length = perm.extent(0);
591 perm_type reverse_perm(
592 Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverse_perm"),
593 perm_length);
594 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
595 KOKKOS_LAMBDA(const local_ordinal_type ii) {
596 reverse_perm(perm(ii)) = ii;
597 });
598 reverse_perm_v_[istream] = reverse_perm;
599 }
600 }
601
602 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
603 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
604 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
605
606 for(int i = 0; i < num_streams_; i++) {
607 A_local_diagblks_rowmap_v_[i] = A_local_diagblks[i].graph.row_map;
608 A_local_diagblks_entries_v_[i] = A_local_diagblks[i].graph.entries;
609 A_local_diagblks_values_v_[i] = A_local_diagblks[i].values;
610
611 Teuchos::RCP<const crs_map_type> A_local_diagblks_RowMap = rcp (new crs_map_type(A_local_diagblks[i].numRows(),
612 A_local_diagblks[i].numRows(),
613 A_local_crs_->getRowMap()->getComm()));
614 Teuchos::RCP<const crs_map_type> A_local_diagblks_ColMap = rcp (new crs_map_type(A_local_diagblks[i].numCols(),
615 A_local_diagblks[i].numCols(),
616 A_local_crs_->getColMap()->getComm()));
617 Teuchos::RCP<crs_matrix_type> A_local_diagblks_ = rcp (new crs_matrix_type(A_local_diagblks_RowMap,
618 A_local_diagblks_ColMap,
619 A_local_diagblks[i]));
620 Graph_v_[i] = rcp (new Ifpack2::IlukGraph<crs_graph_type,kk_handle_type> (A_local_diagblks_->getCrsGraph(),
621 LevelOfFill_, 0, Overalloc_));
622 }
623 }
624 }
625
626 if (this->isKokkosKernelsSpiluk_) {
627 if (!isKokkosKernelsStream_) {
628 this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
629 KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
630 A_local_->getLocalNumRows(),
631 2*A_local_->getLocalNumEntries()*(LevelOfFill_+1),
632 2*A_local_->getLocalNumEntries()*(LevelOfFill_+1) );
633 Graph_->initialize (KernelHandle_); // this calls spiluk_symbolic
634 }
635 else {
636 KernelHandle_v_ = std::vector< Teuchos::RCP<kk_handle_type> >(num_streams_);
637 for (int i = 0; i < num_streams_; i++) {
638 KernelHandle_v_[i] = Teuchos::rcp (new kk_handle_type ());
639 KernelHandle_v_[i]->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
640 A_local_diagblks[i].numRows(),
641 2*A_local_diagblks[i].nnz()*(LevelOfFill_+1),
642 2*A_local_diagblks[i].nnz()*(LevelOfFill_+1) );
643 Graph_v_[i]->initialize (KernelHandle_v_[i]); // this calls spiluk_symbolic
644 }
645 }
646 }
647 else {
648 Graph_->initialize ();
649 }
650
651 allocate_L_and_U ();
652 checkOrderingConsistency (*A_local_);
653 if (!isKokkosKernelsStream_) {
654 L_solver_->setMatrix (L_);
655 }
656 else {
657 L_solver_->setStreamInfo (isKokkosKernelsStream_, num_streams_, exec_space_instances_);
658 L_solver_->setMatrices (L_v_);
659 }
660 L_solver_->initialize ();
661
662 if (!isKokkosKernelsStream_) {
663 U_solver_->setMatrix (U_);
664 }
665 else {
666 U_solver_->setStreamInfo (isKokkosKernelsStream_, num_streams_, exec_space_instances_);
667 U_solver_->setMatrices (U_v_);
668 }
669 U_solver_->initialize ();
670
671 // Do not call initAllValues. compute() always calls initAllValues to
672 // fill L and U with possibly new numbers. initialize() is concerned
673 // only with the nonzero pattern.
674 } // Stop timing
675
676 isInitialized_ = true;
677 ++numInitialize_;
678 initializeTime_ += (timer.wallTime() - startTime);
679}
680
681template<class MatrixType>
682void
685{
686 // First check that the local row map ordering is the same as the local portion of the column map.
687 // The extraction of the strictly lower/upper parts of A, as well as the factorization,
688 // implicitly assume that this is the case.
689 Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getLocalElementList();
690 Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getLocalElementList();
691 bool gidsAreConsistentlyOrdered=true;
692 global_ordinal_type indexOfInconsistentGID=0;
693 for (global_ordinal_type i=0; i<rowGIDs.size(); ++i) {
694 if (rowGIDs[i] != colGIDs[i]) {
695 gidsAreConsistentlyOrdered=false;
696 indexOfInconsistentGID=i;
697 break;
698 }
699 }
700 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
701 "The ordering of the local GIDs in the row and column maps is not the same"
702 << std::endl << "at index " << indexOfInconsistentGID
703 << ". Consistency is required, as all calculations are done with"
704 << std::endl << "local indexing.");
705}
706
707template<class MatrixType>
708void
711{
712 using Teuchos::ArrayRCP;
713 using Teuchos::Comm;
714 using Teuchos::ptr;
715 using Teuchos::RCP;
716 using Teuchos::REDUCE_SUM;
717 using Teuchos::reduceAll;
718 typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
719
720 size_t NumIn = 0, NumL = 0, NumU = 0;
721 bool DiagFound = false;
722 size_t NumNonzeroDiags = 0;
723 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
724
725 // Allocate temporary space for extracting the strictly
726 // lower and upper parts of the matrix A.
727 nonconst_local_inds_host_view_type InI("InI",MaxNumEntries);
728 Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
729 Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
730 nonconst_values_host_view_type InV("InV",MaxNumEntries);
731 Teuchos::Array<scalar_type> LV(MaxNumEntries);
732 Teuchos::Array<scalar_type> UV(MaxNumEntries);
733
734 // Check if values should be inserted or replaced
735 const bool ReplaceValues = L_->isStaticGraph () || L_->isLocallyIndexed ();
736
737 L_->resumeFill ();
738 U_->resumeFill ();
739 if (ReplaceValues) {
740 L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
741 U_->setAllToScalar (STS::zero ());
742 }
743
744 D_->putScalar (STS::zero ()); // Set diagonal values to zero
745 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
746
747 RCP<const map_type> rowMap = L_->getRowMap ();
748
749 // First we copy the user's matrix into L and U, regardless of fill level.
750 // It is important to note that L and U are populated using local indices.
751 // This means that if the row map GIDs are not monotonically increasing
752 // (i.e., permuted or gappy), then the strictly lower (upper) part of the
753 // matrix is not the one that you would get if you based L (U) on GIDs.
754 // This is ok, as the *order* of the GIDs in the rowmap is a better
755 // expression of the user's intent than the GIDs themselves.
756
757 Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getLocalElementList();
758 for (size_t myRow=0; myRow<A.getLocalNumRows(); ++myRow) {
759 local_ordinal_type local_row = myRow;
760
761 //TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
762 // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
763 A.getLocalRowCopy (local_row, InI, InV, NumIn); // Get Values and Indices
764
765 // Split into L and U (we don't assume that indices are ordered).
766
767 NumL = 0;
768 NumU = 0;
769 DiagFound = false;
770
771 for (size_t j = 0; j < NumIn; ++j) {
772 const local_ordinal_type k = InI[j];
773
774 if (k == local_row) {
775 DiagFound = true;
776 // Store perturbed diagonal in Tpetra::Vector D_
777 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
778 }
779 else if (k < 0) { // Out of range
780 TEUCHOS_TEST_FOR_EXCEPTION(
781 true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
782 "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
783 "probably an artifact of the undocumented assumptions of the "
784 "original implementation (likely copied and pasted from Ifpack). "
785 "Nevertheless, the code I found here insisted on this being an error "
786 "state, so I will throw an exception here.");
787 }
788 else if (k < local_row) {
789 LI[NumL] = k;
790 LV[NumL] = InV[j];
791 NumL++;
792 }
793 else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
794 UI[NumU] = k;
795 UV[NumU] = InV[j];
796 NumU++;
797 }
798 }
799
800 // Check in things for this row of L and U
801
802 if (DiagFound) {
803 ++NumNonzeroDiags;
804 } else {
805 DV(local_row) = Athresh_;
806 }
807
808 if (NumL) {
809 if (ReplaceValues) {
810 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0,NumL));
811 } else {
812 //FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
813 //FIXME in this row in the column locations corresponding to UI.
814 L_->insertLocalValues(local_row, LI(0,NumL), LV(0,NumL));
815 }
816 }
817
818 if (NumU) {
819 if (ReplaceValues) {
820 U_->replaceLocalValues(local_row, UI(0,NumU), UV(0,NumU));
821 } else {
822 //FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
823 //FIXME in this row in the column locations corresponding to UI.
824 U_->insertLocalValues(local_row, UI(0,NumU), UV(0,NumU));
825 }
826 }
827 }
828
829 // At this point L and U have the values of A in the structure of L
830 // and U, and diagonal vector D
831
832 isInitialized_ = true;
833}
834
835template<class MatrixType>
836void RILUK<MatrixType>::compute_serial ()
837{
838 // Fill L and U with numbers. This supports nonzero pattern reuse by calling
839 // initialize() once and then compute() multiple times.
840 initAllValues (*A_local_);
841
842 // MinMachNum should be officially defined, for now pick something a little
843 // bigger than IEEE underflow value
844
845 const scalar_type MinDiagonalValue = STS::rmin ();
846 const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
847
848 size_t NumIn, NumL, NumU;
849
850 // Get Maximum Row length
851 const size_t MaxNumEntries =
852 L_->getLocalMaxNumRowEntries () + U_->getLocalMaxNumRowEntries () + 1;
853
854 Teuchos::Array<local_ordinal_type> InI(MaxNumEntries); // Allocate temp space
855 Teuchos::Array<scalar_type> InV(MaxNumEntries);
856 size_t num_cols = U_->getColMap()->getLocalNumElements();
857 Teuchos::Array<int> colflag(num_cols, -1);
858
859 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
860
861 // Now start the factorization.
862
863 using IST = typename row_matrix_type::impl_scalar_type;
864 for (size_t i = 0; i < L_->getLocalNumRows (); ++i) {
865 local_ordinal_type local_row = i;
866 // Need some integer workspace and pointers
867 size_t NumUU;
868 local_inds_host_view_type UUI;
869 values_host_view_type UUV;
870
871 // Fill InV, InI with current row of L, D and U combined
872
873 NumIn = MaxNumEntries;
874 nonconst_local_inds_host_view_type InI_v(InI.data(),MaxNumEntries);
875 nonconst_values_host_view_type InV_v(reinterpret_cast<IST*>(InV.data()),MaxNumEntries);
876
877 L_->getLocalRowCopy (local_row, InI_v , InV_v, NumL);
878
879 InV[NumL] = DV(i); // Put in diagonal
880 InI[NumL] = local_row;
881
882 nonconst_local_inds_host_view_type InI_sub(InI.data()+NumL+1,MaxNumEntries-NumL-1);
883 nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data())+NumL+1,MaxNumEntries-NumL-1);
884
885 U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU);
886 NumIn = NumL+NumU+1;
887
888 // Set column flags
889 for (size_t j = 0; j < NumIn; ++j) {
890 colflag[InI[j]] = j;
891 }
892
893 scalar_type diagmod = STS::zero (); // Off-diagonal accumulator
894
895 for (size_t jj = 0; jj < NumL; ++jj) {
896 local_ordinal_type j = InI[jj];
897 IST multiplier = InV[jj]; // current_mults++;
898
899 InV[jj] *= static_cast<scalar_type>(DV(j));
900
901 U_->getLocalRowView(j, UUI, UUV); // View of row above
902 NumUU = UUI.size();
903
904 if (RelaxValue_ == STM::zero ()) {
905 for (size_t k = 0; k < NumUU; ++k) {
906 const int kk = colflag[UUI[k]];
907 // FIXME (mfh 23 Dec 2013) Wait a second, we just set
908 // colflag above using size_t (which is generally unsigned),
909 // but now we're querying it using int (which is signed).
910 if (kk > -1) {
911 InV[kk] -= static_cast<scalar_type>(multiplier * UUV[k]);
912 }
913 }
914
915 }
916 else {
917 for (size_t k = 0; k < NumUU; ++k) {
918 // FIXME (mfh 23 Dec 2013) Wait a second, we just set
919 // colflag above using size_t (which is generally unsigned),
920 // but now we're querying it using int (which is signed).
921 const int kk = colflag[UUI[k]];
922 if (kk > -1) {
923 InV[kk] -= static_cast<scalar_type>(multiplier*UUV[k]);
924 }
925 else {
926 diagmod -= static_cast<scalar_type>(multiplier*UUV[k]);
927 }
928 }
929 }
930 }
931
932 if (NumL) {
933 // Replace current row of L
934 L_->replaceLocalValues (local_row, InI (0, NumL), InV (0, NumL));
935 }
936
937 DV(i) = InV[NumL]; // Extract Diagonal value
938
939 if (RelaxValue_ != STM::zero ()) {
940 DV(i) += RelaxValue_*diagmod; // Add off diagonal modifications
941 }
942
943 if (STS::magnitude (DV(i)) > STS::magnitude (MaxDiagonalValue)) {
944 if (STS::real (DV(i)) < STM::zero ()) {
945 DV(i) = -MinDiagonalValue;
946 }
947 else {
948 DV(i) = MinDiagonalValue;
949 }
950 }
951 else {
952 DV(i) = static_cast<impl_scalar_type>(STS::one ()) / DV(i); // Invert diagonal value
953 }
954
955 for (size_t j = 0; j < NumU; ++j) {
956 InV[NumL+1+j] *= static_cast<scalar_type>(DV(i)); // Scale U by inverse of diagonal
957 }
958
959 if (NumU) {
960 // Replace current row of L and U
961 U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU));
962 }
963
964 // Reset column flags
965 for (size_t j = 0; j < NumIn; ++j) {
966 colflag[InI[j]] = -1;
967 }
968 }
969
970 // The domain of L and the range of U are exactly their own row maps
971 // (there is no communication). The domain of U and the range of L
972 // must be the same as those of the original matrix, However if the
973 // original matrix is a VbrMatrix, these two latter maps are
974 // translation from a block map to a point map.
975 // FIXME (mfh 23 Dec 2013) Do we know that the column Map of L_ is
976 // always one-to-one?
977 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
978 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
979
980 // If L_solver_ or U_solver store modified factors internally, we need to reset those
981 L_solver_->setMatrix (L_);
982 L_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
983 U_solver_->setMatrix (U_);
984 U_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
985
986}
987
988template<class MatrixType>
989void RILUK<MatrixType>::compute_kkspiluk()
990{
991 L_->resumeFill ();
992 U_->resumeFill ();
993
994 L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
995 U_->setAllToScalar (STS::zero ());
996
997 using row_map_type = typename crs_matrix_type::local_matrix_device_type::row_map_type;
998 auto lclL = L_->getLocalMatrixDevice();
999 row_map_type L_rowmap = lclL.graph.row_map;
1000 auto L_entries = lclL.graph.entries;
1001 auto L_values = lclL.values;
1002
1003 auto lclU = U_->getLocalMatrixDevice();
1004 row_map_type U_rowmap = lclU.graph.row_map;
1005 auto U_entries = lclU.graph.entries;
1006 auto U_values = lclU.values;
1007
1008 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1009 KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
1010 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
1011 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
1012
1013 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
1014 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
1015
1016 L_solver_->compute ();
1017 U_solver_->compute ();
1018}
1019
1020template<class MatrixType>
1021void RILUK<MatrixType>::compute_kkspiluk_stream()
1022{
1023 for(int i = 0; i < num_streams_; i++) {
1024 L_v_[i]->resumeFill ();
1025 U_v_[i]->resumeFill ();
1026
1027 L_v_[i]->setAllToScalar (STS::zero ()); // Zero out L and U matrices
1028 U_v_[i]->setAllToScalar (STS::zero ());
1029 }
1030 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1031 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1032 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1033 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1034 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1035 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1036 std::vector<kk_handle_type *> KernelHandle_rawptr_v_(num_streams_);
1037 for(int i = 0; i < num_streams_; i++) {
1038 auto lclL = L_v_[i]->getLocalMatrixDevice();
1039 L_rowmap_v[i] = lclL.graph.row_map;
1040 L_entries_v[i] = lclL.graph.entries;
1041 L_values_v[i] = lclL.values;
1042
1043 auto lclU = U_v_[i]->getLocalMatrixDevice();
1044 U_rowmap_v[i] = lclU.graph.row_map;
1045 U_entries_v[i] = lclU.graph.entries;
1046 U_values_v[i] = lclU.values;
1047 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1048 }
1049
1050 {
1051 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1052 // A_local_diagblks was already setup during initialize, just copy the corresponding
1053 // values from A_local_crs_ in parallel now.
1054 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1055 const auto A_nrows = lclMtx.numRows();
1056 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1057 ? (A_nrows / num_streams_)
1058 : (A_nrows / num_streams_ + 1);
1059 for(int i = 0; i < num_streams_; i++) {
1060 const auto start_row_offset = i * rows_per_block;
1061 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1062 auto colindices = A_local_diagblks_entries_v_[i];
1063 auto values = A_local_diagblks_values_v_[i];
1064 const bool reordered = hasStreamReordered_;
1065 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] : typename lno_nonzero_view_t::non_const_type{};
1066 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1067 Kokkos::parallel_for(pol, KOKKOS_LAMBDA (const typename TeamPolicy::member_type &team) {
1068 const auto irow = team.league_rank();
1069 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1070 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1071 const auto begin_row = rowptrs(irow);
1072 const auto num_entries = rowptrs(irow + 1) - begin_row;
1073 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](const int j) {
1074 const auto colidx = colindices(begin_row + j);
1075 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1076 // Find colidx in A_local_crs_row
1077 const int offset = KokkosSparse::findRelOffset(
1078 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0, false);
1079 values(begin_row + j) = A_local_crs_row.value(offset);
1080 });
1081 });
1082 }
1083 }
1084
1085 KokkosSparse::Experimental::spiluk_numeric_streams( exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1086 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1087 L_rowmap_v, L_entries_v, L_values_v,
1088 U_rowmap_v, U_entries_v, U_values_v );
1089 for(int i = 0; i < num_streams_; i++) {
1090 L_v_[i]->fillComplete ();
1091 U_v_[i]->fillComplete ();
1092 }
1093
1094 L_solver_->compute ();
1095 U_solver_->compute ();
1096}
1097
1098template<class MatrixType>
1100{
1101 using Teuchos::RCP;
1102 using Teuchos::rcp;
1103 using Teuchos::rcp_const_cast;
1104 using Teuchos::rcp_dynamic_cast;
1105 using Teuchos::Array;
1106 using Teuchos::ArrayView;
1107 const char prefix[] = "Ifpack2::RILUK::compute: ";
1108
1109 // initialize() checks this too, but it's easier for users if the
1110 // error shows them the name of the method that they actually
1111 // called, rather than the name of some internally called method.
1112 TEUCHOS_TEST_FOR_EXCEPTION
1113 (A_.is_null (), std::runtime_error, prefix << "The matrix is null. Please "
1114 "call setMatrix() with a nonnull input before calling this method.");
1115 TEUCHOS_TEST_FOR_EXCEPTION
1116 (! A_->isFillComplete (), std::runtime_error, prefix << "The matrix is not "
1117 "fill complete. You may not invoke initialize() or compute() with this "
1118 "matrix until the matrix is fill complete. If your matrix is a "
1119 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1120 "range Maps, if appropriate) before calling this method.");
1121
1122 if (! isInitialized ()) {
1123 initialize (); // Don't count this in the compute() time
1124 }
1125
1126 Teuchos::Time timer ("RILUK::compute");
1127
1128 // Start timing
1129 Teuchos::TimeMonitor timeMon (timer);
1130 double startTime = timer.wallTime();
1131
1132 isComputed_ = false;
1133
1134 if (!this->isKokkosKernelsSpiluk_) {
1135 compute_serial();
1136 }
1137 else {
1138 //Make sure values in A is picked up even in case of pattern reuse
1139 if(!A_local_crs_nc_.is_null()) {
1140 A_local_crs_nc_->resumeFill();
1141 local_ordinal_type numRows = A_local_->getLocalNumRows();
1142 Array<size_t> entriesPerRow(numRows);
1143 for(local_ordinal_type i = 0; i < numRows; i++) {
1144 entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i);
1145 }
1146 // copy entries into A_local_crs
1147 nonconst_local_inds_host_view_type indices("indices",A_local_->getLocalMaxNumRowEntries());
1148 nonconst_values_host_view_type values("values",A_local_->getLocalMaxNumRowEntries());
1149 for(local_ordinal_type i = 0; i < numRows; i++) {
1150 size_t numEntries = 0;
1151 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1152 A_local_crs_nc_->replaceLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()),indices.data());
1153 }
1154 A_local_crs_nc_->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
1155 }
1156
1157 if (!isKokkosKernelsStream_) {
1158 compute_kkspiluk();
1159 }
1160 else {
1161 compute_kkspiluk_stream();
1162 }
1163 }
1164
1165 isComputed_ = true;
1166 ++numCompute_;
1167 computeTime_ += (timer.wallTime() - startTime);
1168}
1169
1170namespace Impl {
1171template <typename MV, typename Map>
1172void resetMultiVecIfNeeded(std::unique_ptr<MV> &mv_ptr, const Map &map, const size_t numVectors, bool initialize)
1173{
1174 if(!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1175 mv_ptr.reset(new MV(map, numVectors, initialize));
1176 }
1177}
1178}
1179
1180template<class MatrixType>
1181void
1183apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1184 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1185 Teuchos::ETransp mode,
1186 scalar_type alpha,
1187 scalar_type beta) const
1188{
1189 using Teuchos::RCP;
1190 using Teuchos::rcpFromRef;
1191
1192 TEUCHOS_TEST_FOR_EXCEPTION(
1193 A_.is_null (), std::runtime_error, "Ifpack2::RILUK::apply: The matrix is "
1194 "null. Please call setMatrix() with a nonnull input, then initialize() "
1195 "and compute(), before calling this method.");
1196 TEUCHOS_TEST_FOR_EXCEPTION(
1197 ! isComputed (), std::runtime_error,
1198 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1199 "you must call compute() before calling this method.");
1200 TEUCHOS_TEST_FOR_EXCEPTION(
1201 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
1202 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1203 "X.getNumVectors() = " << X.getNumVectors ()
1204 << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
1205 TEUCHOS_TEST_FOR_EXCEPTION(
1206 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1207 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1208 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1209 "fixed. There is a FIXME in this file about this very issue.");
1210#ifdef HAVE_IFPACK2_DEBUG
1211 {
1212 if (!isKokkosKernelsStream_) {
1213 const magnitude_type D_nrm1 = D_->norm1 ();
1214 TEUCHOS_TEST_FOR_EXCEPTION( STM::isnaninf (D_nrm1), std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1215 }
1216 Teuchos::Array<magnitude_type> norms (X.getNumVectors ());
1217 X.norm1 (norms ());
1218 bool good = true;
1219 for (size_t j = 0; j < X.getNumVectors (); ++j) {
1220 if (STM::isnaninf (norms[j])) {
1221 good = false;
1222 break;
1223 }
1224 }
1225 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1226 }
1227#endif // HAVE_IFPACK2_DEBUG
1228
1229 const scalar_type one = STS::one ();
1230 const scalar_type zero = STS::zero ();
1231
1232 Teuchos::Time timer ("RILUK::apply");
1233 double startTime = timer.wallTime();
1234 { // Start timing
1235 Teuchos::TimeMonitor timeMon (timer);
1236 if (alpha == one && beta == zero) {
1237 if (isKokkosKernelsSpiluk_ && isKokkosKernelsStream_ && hasStreamReordered_) {
1238 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(), false);
1239 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(), false);
1240 Kokkos::fence();
1241 for (size_t j = 0; j < X.getNumVectors(); j++) {
1242 auto X_j = X.getVector(j);
1243 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1244 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1245 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1246 local_ordinal_type stream_begin = 0;
1247 local_ordinal_type stream_end;
1248 for(int i = 0; i < num_streams_; i++) {
1249 auto perm_i = perm_v_[i];
1250 stream_end = stream_begin + perm_i.extent(0);
1251 auto X_lcl_sub = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1252 auto ReorderedX_lcl_sub = Kokkos::subview (ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1253 Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA ( const int& ii ) {
1254 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1255 });
1256 stream_begin = stream_end;
1257 }
1258 }
1259 Kokkos::fence();
1260 if (mode == Teuchos::NO_TRANS) { // Solve L (U Y) = X for Y.
1261 // Solve L Y = X for Y.
1262 L_solver_->apply (*reordered_x_, Y, mode);
1263 // Solve U Y = Y for Y.
1264 U_solver_->apply (Y, *reordered_y_, mode);
1265 }
1266 else { // Solve U^P (L^P Y) = X for Y (where P is * or T).
1267 // Solve U^P Y = X for Y.
1268 U_solver_->apply (*reordered_x_, Y, mode);
1269 // Solve L^P Y = Y for Y.
1270 L_solver_->apply (Y, *reordered_y_, mode);
1271 }
1272
1273 for (size_t j = 0; j < Y.getNumVectors(); j++) {
1274 auto Y_j = Y.getVectorNonConst(j);
1275 auto ReorderedY_j = reordered_y_->getVector(j);
1276 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1277 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1278 local_ordinal_type stream_begin = 0;
1279 local_ordinal_type stream_end;
1280 for(int i = 0; i < num_streams_; i++) {
1281 auto perm_i = perm_v_[i];
1282 stream_end = stream_begin + perm_i.extent(0);
1283 auto Y_lcl_sub = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1284 auto ReorderedY_lcl_sub = Kokkos::subview (ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1285 Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0, static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA ( const int& ii ) {
1286 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1287 });
1288 stream_begin = stream_end;
1289 }
1290 }
1291 Kokkos::fence();
1292 }
1293 else {
1294 if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
1295#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1296 //NOTE (Nov-15-2022):
1297 //This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
1298 //since cusparseSpSV_solve() does not support in-place computation
1299 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(), false);
1300
1301 // Start by solving L Y_tmp = X for Y_tmp.
1302 L_solver_->apply (X, *Y_tmp_, mode);
1303
1304 if (!this->isKokkosKernelsSpiluk_) {
1305 // Solve D Y = Y. The operation lets us do this in place in Y, so we can
1306 // write "solve D Y = Y for Y."
1307 Y_tmp_->elementWiseMultiply (one, *D_, *Y_tmp_, zero);
1308 }
1309
1310 U_solver_->apply (*Y_tmp_, Y, mode); // Solve U Y = Y_tmp.
1311#else
1312 // Start by solving L Y = X for Y.
1313 L_solver_->apply (X, Y, mode);
1314
1315 if (!this->isKokkosKernelsSpiluk_) {
1316 // Solve D Y = Y. The operation lets us do this in place in Y, so we can
1317 // write "solve D Y = Y for Y."
1318 Y.elementWiseMultiply (one, *D_, Y, zero);
1319 }
1320
1321 U_solver_->apply (Y, Y, mode); // Solve U Y = Y.
1322#endif
1323 }
1324 else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1325#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1326 //NOTE (Nov-15-2022):
1327 //This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
1328 //since cusparseSpSV_solve() does not support in-place computation
1329 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(), false);
1330
1331 // Start by solving U^P Y_tmp = X for Y_tmp.
1332 U_solver_->apply (X, *Y_tmp_, mode);
1333
1334 if (!this->isKokkosKernelsSpiluk_) {
1335 // Solve D^P Y = Y.
1336 //
1337 // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1338 // need to do an elementwise multiply with the conjugate of
1339 // D_, not just with D_ itself.
1340 Y_tmp_->elementWiseMultiply (one, *D_, *Y_tmp_, zero);
1341 }
1342
1343 L_solver_->apply (*Y_tmp_, Y, mode); // Solve L^P Y = Y_tmp.
1344#else
1345 // Start by solving U^P Y = X for Y.
1346 U_solver_->apply (X, Y, mode);
1347
1348 if (!this->isKokkosKernelsSpiluk_) {
1349 // Solve D^P Y = Y.
1350 //
1351 // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1352 // need to do an elementwise multiply with the conjugate of
1353 // D_, not just with D_ itself.
1354 Y.elementWiseMultiply (one, *D_, Y, zero);
1355 }
1356
1357 L_solver_->apply (Y, Y, mode); // Solve L^P Y = Y.
1358#endif
1359 }
1360 }
1361 }
1362 else { // alpha != 1 or beta != 0
1363 if (alpha == zero) {
1364 // The special case for beta == 0 ensures that if Y contains Inf
1365 // or NaN values, we replace them with 0 (following BLAS
1366 // convention), rather than multiplying them by 0 to get NaN.
1367 if (beta == zero) {
1368 Y.putScalar (zero);
1369 } else {
1370 Y.scale (beta);
1371 }
1372 } else { // alpha != zero
1373 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(), false);
1374 apply (X, *Y_tmp_, mode);
1375 Y.update (alpha, *Y_tmp_, beta);
1376 }
1377 }
1378 }//end timing
1379
1380#ifdef HAVE_IFPACK2_DEBUG
1381 {
1382 Teuchos::Array<magnitude_type> norms (Y.getNumVectors ());
1383 Y.norm1 (norms ());
1384 bool good = true;
1385 for (size_t j = 0; j < Y.getNumVectors (); ++j) {
1386 if (STM::isnaninf (norms[j])) {
1387 good = false;
1388 break;
1389 }
1390 }
1391 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1392 }
1393#endif // HAVE_IFPACK2_DEBUG
1394
1395 ++numApply_;
1396 applyTime_ += (timer.wallTime() - startTime);
1397}
1398
1399
1400//VINH comment out since multiply() is not needed anywhere
1401//template<class MatrixType>
1402//void RILUK<MatrixType>::
1403//multiply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1404// Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1405// const Teuchos::ETransp mode) const
1406//{
1407// const scalar_type zero = STS::zero ();
1408// const scalar_type one = STS::one ();
1409//
1410// if (mode != Teuchos::NO_TRANS) {
1411// U_->apply (X, Y, mode); //
1412// Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1413//
1414// // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we need
1415// // to do an elementwise multiply with the conjugate of D_, not
1416// // just with D_ itself.
1417// Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1418//
1419// MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y
1420// L_->apply (Y_tmp, Y, mode);
1421// Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1422// }
1423// else {
1424// L_->apply (X, Y, mode);
1425// Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1426// Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1427// MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y1
1428// U_->apply (Y_tmp, Y, mode);
1429// Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1430// }
1431//}
1432
1433template<class MatrixType>
1435{
1436 std::ostringstream os;
1437
1438 // Output is a valid YAML dictionary in flow style. If you don't
1439 // like everything on a single line, you should call describe()
1440 // instead.
1441 os << "\"Ifpack2::RILUK\": {";
1442 os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
1443 << "Computed: " << (isComputed () ? "true" : "false") << ", ";
1444
1445 os << "Level-of-fill: " << getLevelOfFill() << ", ";
1446
1447 if(isKokkosKernelsSpiluk_) os<<"KK-SPILUK, ";
1448 if(isKokkosKernelsStream_) os<<"KK-Stream, ";
1449
1450 if (A_.is_null ()) {
1451 os << "Matrix: null";
1452 }
1453 else {
1454 os << "Global matrix dimensions: ["
1455 << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
1456 << ", Global nnz: " << A_->getGlobalNumEntries();
1457 }
1458
1459 if (! L_solver_.is_null ()) os << ", " << L_solver_->description ();
1460 if (! U_solver_.is_null ()) os << ", " << U_solver_->description ();
1461
1462 os << "}";
1463 return os.str ();
1464}
1465
1466} // namespace Ifpack2
1467
1468#define IFPACK2_RILUK_INSTANT(S,LO,GO,N) \
1469 template class Ifpack2::RILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
1470
1471#endif
Declaration of MDF interface.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition Ifpack2_IlukGraph.hpp:67
"Preconditioner" that solves local sparse triangular systems.
Definition Ifpack2_LocalSparseTriangularSolver_decl.hpp:55
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition Ifpack2_RILUK_decl.hpp:222
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition Ifpack2_RILUK_def.hpp:481
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition Ifpack2_RILUK_decl.hpp:258
virtual ~RILUK()
Destructor (declared virtual for memory safety).
Definition Ifpack2_RILUK_def.hpp:98
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition Ifpack2_RILUK_def.hpp:1183
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_RILUK_decl.hpp:237
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition Ifpack2_RILUK_def.hpp:241
Teuchos::RCP< const row_matrix_type > A_
The (original) input matrix for which to compute ILU(k).
Definition Ifpack2_RILUK_decl.hpp:572
Teuchos::RCP< crs_matrix_type > U_
The U (upper triangular) factor of ILU(k).
Definition Ifpack2_RILUK_decl.hpp:594
const crs_matrix_type & getU() const
Return the U factor of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:192
std::string description() const
A one-line description of this object.
Definition Ifpack2_RILUK_def.hpp:1434
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_RILUK_def.hpp:125
bool isComputed() const
Whether compute() has been called on this object.
Definition Ifpack2_RILUK_decl.hpp:342
const Tpetra::Vector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > & getD() const
Return the diagonal entries of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:178
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:234
Teuchos::RCP< const row_matrix_type > A_local_
The matrix whos numbers are used to to compute ILU(k). The graph may be computed using a crs_matrix_t...
Definition Ifpack2_RILUK_decl.hpp:580
Teuchos::RCP< const row_matrix_type > getMatrix() const
Get the input matrix.
Definition Ifpack2_RILUK_def.hpp:434
Teuchos::RCP< LocalSparseTriangularSolver< row_matrix_type > > L_solver_
Sparse triangular solver for L.
Definition Ifpack2_RILUK_decl.hpp:592
int getLevelOfFill() const
Get level of fill (the "k" in ILU(k)).
Definition Ifpack2_RILUK_decl.hpp:504
Teuchos::RCP< const crs_matrix_type > getCrsMatrix() const
Return the input matrix A as a Tpetra::CrsMatrix, if possible; else throws.
Definition Ifpack2_RILUK_def.hpp:441
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition Ifpack2_RILUK_def.hpp:222
void setParameters(const Teuchos::ParameterList &params)
Definition Ifpack2_RILUK_def.hpp:306
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:228
Teuchos::RCP< crs_matrix_type > L_
The L (lower triangular) factor of ILU(k).
Definition Ifpack2_RILUK_decl.hpp:589
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_RILUK_decl.hpp:231
const crs_matrix_type & getL() const
Return the L factor of the ILU factorization.
Definition Ifpack2_RILUK_def.hpp:161
Teuchos::RCP< LocalSparseTriangularSolver< row_matrix_type > > U_solver_
Sparse triangular solver for U.
Definition Ifpack2_RILUK_decl.hpp:597
Teuchos::RCP< iluk_graph_type > Graph_
The ILU(k) graph.
Definition Ifpack2_RILUK_decl.hpp:575
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition Ifpack2_RILUK_def.hpp:205
void compute()
Compute the (numeric) incomplete factorization.
Definition Ifpack2_RILUK_def.hpp:1099
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Return A, wrapped in a LocalFilter, if necessary.
Definition Ifpack2_RILUK_def.hpp:448
Teuchos::RCP< vec_type > D_
The diagonal entries of the ILU(k) factorization.
Definition Ifpack2_RILUK_decl.hpp:600
bool isKokkosKernelsSpiluk_
Optional KokkosKernels implementation.
Definition Ifpack2_RILUK_decl.hpp:622
bool isInitialized() const
Whether initialize() has been called on this object.
Definition Ifpack2_RILUK_decl.hpp:338
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:41