Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Experimental_RBILUK_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
11#define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
12
13#include "Tpetra_BlockMultiVector.hpp"
14#include "Tpetra_BlockView.hpp"
15#include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp"
16#include "Ifpack2_OverlappingRowMatrix.hpp"
17#include "Ifpack2_Details_getCrsMatrix.hpp"
18#include "Ifpack2_LocalFilter.hpp"
19#include "Ifpack2_Utilities.hpp"
20#include "Ifpack2_RILUK.hpp"
21#include "KokkosSparse_sptrsv.hpp"
22
23//#define IFPACK2_RBILUK_INITIAL
24//#define IFPACK2_RBILUK_INITIAL_NOKK
25
26#ifndef IFPACK2_RBILUK_INITIAL_NOKK
27#include "KokkosBatched_Gemm_Decl.hpp"
28#include "KokkosBatched_Gemm_Serial_Impl.hpp"
29#include "KokkosBatched_Util.hpp"
30#endif
31
32namespace Ifpack2 {
33
34namespace Experimental {
35
36namespace
37{
38template<class MatrixType>
39struct LocalRowHandler
40{
41 using LocalOrdinal = typename MatrixType::local_ordinal_type;
42 using row_matrix_type = Tpetra::RowMatrix<
43 typename MatrixType::scalar_type,
44 LocalOrdinal,
45 typename MatrixType::global_ordinal_type,
46 typename MatrixType::node_type>;
47 using inds_type = typename row_matrix_type::local_inds_host_view_type;
48 using vals_type = typename row_matrix_type::values_host_view_type;
49
50 LocalRowHandler(Teuchos::RCP<const row_matrix_type> A)
51 : A_(std::move(A))
52 {
53 if (!A_->supportsRowViews())
54 {
55 const auto maxNumRowEntr = A_->getLocalMaxNumRowEntries();
56 const auto blockSize = A_->getBlockSize();
57 ind_nc_ = inds_type_nc("Ifpack2::RBILUK::LocalRowHandler::indices",maxNumRowEntr);
58 val_nc_ = vals_type_nc("Ifpack2::RBILUK::LocalRowHandler::values",maxNumRowEntr*blockSize*blockSize);
59 }
60 }
61
62 void getLocalRow(LocalOrdinal local_row, inds_type & InI, vals_type & InV, LocalOrdinal & NumIn)
63 {
64 if (A_->supportsRowViews())
65 {
66 A_->getLocalRowView(local_row,InI,InV);
67 NumIn = (LocalOrdinal)InI.size();
68 }
69 else
70 {
71 size_t cnt;
72 A_->getLocalRowCopy(local_row,ind_nc_,val_nc_,cnt);
73 InI = ind_nc_;
74 InV = val_nc_;
75 NumIn = (LocalOrdinal)cnt;
76 }
77 }
78
79private:
80
81 using inds_type_nc = typename row_matrix_type::nonconst_local_inds_host_view_type;
82 using vals_type_nc = typename row_matrix_type::nonconst_values_host_view_type;
83
84 Teuchos::RCP<const row_matrix_type> A_;
85 inds_type_nc ind_nc_;
86 vals_type_nc val_nc_;
87};
88
89} // namespace
90
91template<class MatrixType>
92RBILUK<MatrixType>::RBILUK (const Teuchos::RCP<const row_matrix_type>& Matrix_in)
93 : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) )
94{}
95
96template<class MatrixType>
97RBILUK<MatrixType>::RBILUK (const Teuchos::RCP<const block_crs_matrix_type>& Matrix_in)
98 : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) )
99{}
100
101
102template<class MatrixType>
104
105
106template<class MatrixType>
107void
108RBILUK<MatrixType>::setMatrix (const Teuchos::RCP<const block_crs_matrix_type>& A)
109{
110 // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
111
112 // It's legal for A to be null; in that case, you may not call
113 // initialize() until calling setMatrix() with a nonnull input.
114 // Regardless, setting the matrix invalidates any previous
115 // factorization.
116 if (A.getRawPtr () != this->A_.getRawPtr ())
117 {
118 this->isAllocated_ = false;
119 this->isInitialized_ = false;
120 this->isComputed_ = false;
121 this->Graph_ = Teuchos::null;
122 L_block_ = Teuchos::null;
123 U_block_ = Teuchos::null;
124 D_block_ = Teuchos::null;
125 }
126}
127
128
129
130template<class MatrixType>
131const typename RBILUK<MatrixType>::block_crs_matrix_type&
133{
134 TEUCHOS_TEST_FOR_EXCEPTION(
135 L_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
136 "is null. Please call initialize() (and preferably also compute()) "
137 "before calling this method. If the input matrix has not yet been set, "
138 "you must first call setMatrix() with a nonnull input matrix before you "
139 "may call initialize() or compute().");
140 return *L_block_;
141}
142
143
144template<class MatrixType>
145const typename RBILUK<MatrixType>::block_crs_matrix_type&
147{
148 TEUCHOS_TEST_FOR_EXCEPTION(
149 D_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
150 "(of diagonal entries) is null. Please call initialize() (and "
151 "preferably also compute()) before calling this method. If the input "
152 "matrix has not yet been set, you must first call setMatrix() with a "
153 "nonnull input matrix before you may call initialize() or compute().");
154 return *D_block_;
155}
156
157
158template<class MatrixType>
159const typename RBILUK<MatrixType>::block_crs_matrix_type&
161{
162 TEUCHOS_TEST_FOR_EXCEPTION(
163 U_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
164 "is null. Please call initialize() (and preferably also compute()) "
165 "before calling this method. If the input matrix has not yet been set, "
166 "you must first call setMatrix() with a nonnull input matrix before you "
167 "may call initialize() or compute().");
168 return *U_block_;
169}
170
171template<class MatrixType>
172void RBILUK<MatrixType>::allocate_L_and_U_blocks ()
173{
174 using Teuchos::null;
175 using Teuchos::rcp;
176
177 if (! this->isAllocated_) {
178 // Deallocate any existing storage. This avoids storing 2x
179 // memory, since RCP op= does not deallocate until after the
180 // assignment.
181 this->L_ = null;
182 this->U_ = null;
183 this->D_ = null;
184 L_block_ = null;
185 U_block_ = null;
186 D_block_ = null;
187
188 // Allocate Matrix using ILUK graphs
189 L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph (), blockSize_) );
190 U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph (), blockSize_) );
191 D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
192 blockSize_) );
193 L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
194 U_block_->setAllToScalar (STM::zero ());
195 D_block_->setAllToScalar (STM::zero ());
196
197 // Allocate temp space for apply
198 if (this->isKokkosKernelsSpiluk_) {
199 const auto numRows = L_block_->getLocalNumRows();
200 tmp_ = decltype(tmp_)("RBILUK::tmp_", numRows * blockSize_);
201 }
202 }
203 this->isAllocated_ = true;
204}
205
206namespace
207{
208
209template<class MatrixType>
210Teuchos::RCP<const typename RBILUK<MatrixType>::crs_graph_type>
211getBlockCrsGraph(const Teuchos::RCP<const typename RBILUK<MatrixType>::row_matrix_type>& A)
212{
213 using local_ordinal_type = typename MatrixType::local_ordinal_type;
214 using Teuchos::RCP;
215 using Teuchos::rcp;
216 using Teuchos::rcp_dynamic_cast;
217 using Teuchos::rcp_const_cast;
218 using Teuchos::rcpFromRef;
219 using row_matrix_type = typename RBILUK<MatrixType>::row_matrix_type;
220 using crs_graph_type = typename RBILUK<MatrixType>::crs_graph_type;
221 using block_crs_matrix_type = typename RBILUK<MatrixType>::block_crs_matrix_type;
222
223 auto A_local = RBILUK<MatrixType>::makeLocalFilter(A);
224
225 {
226 RCP<const LocalFilter<row_matrix_type> > filteredA =
227 rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_local);
228 RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA = Teuchos::null;
229 RCP<const block_crs_matrix_type > A_block = Teuchos::null;
230 if (!filteredA.is_null ())
231 {
232 overlappedA = rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> > (filteredA->getUnderlyingMatrix ());
233 }
234
235 if (! overlappedA.is_null ()) {
236 A_block = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
237 }
238 else if (!filteredA.is_null ()){
239 //If there is no overlap, filteredA could be the block CRS matrix
240 A_block = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
241 }
242 else
243 {
244 A_block = rcp_dynamic_cast<const block_crs_matrix_type>(A_local);
245 }
246
247 if (!A_block.is_null()){
248 return rcpFromRef(A_block->getCrsGraph());
249 }
250 }
251
252 // Could not extract block crs, make graph manually
253 {
254 local_ordinal_type numRows = A_local->getLocalNumRows();
255 Teuchos::Array<size_t> entriesPerRow(numRows);
256 for(local_ordinal_type i = 0; i < numRows; i++) {
257 entriesPerRow[i] = A_local->getNumEntriesInLocalRow(i);
258 }
259 RCP<crs_graph_type> A_local_crs_nc =
260 rcp (new crs_graph_type (A_local->getRowMap (),
261 A_local->getColMap (),
262 entriesPerRow()));
263
264 {
265 using LocalRowHandler_t = LocalRowHandler<MatrixType>;
266 LocalRowHandler_t localRowHandler(A_local);
267 typename LocalRowHandler_t::inds_type indices;
268 typename LocalRowHandler_t::vals_type values;
269 for(local_ordinal_type i = 0; i < numRows; i++) {
270 local_ordinal_type numEntries = 0;
271 localRowHandler.getLocalRow(i, indices, values, numEntries);
272 A_local_crs_nc->insertLocalIndices(i, numEntries,indices.data());
273 }
274 }
275
276 A_local_crs_nc->fillComplete (A_local->getDomainMap (), A_local->getRangeMap ());
277 return rcp_const_cast<const crs_graph_type> (A_local_crs_nc);
278 }
279
280}
281
282
283} // namespace
284
285template<class MatrixType>
287{
288 using Teuchos::RCP;
289 using Teuchos::rcp;
290 using Teuchos::rcp_dynamic_cast;
291 const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
292
293 TEUCHOS_TEST_FOR_EXCEPTION
294 (this->A_.is_null (), std::runtime_error, prefix << "The matrix (A_, the "
295 "RowMatrix) is null. Please call setMatrix() with a nonnull input "
296 "before calling this method.");
297 TEUCHOS_TEST_FOR_EXCEPTION
298 (! this->A_->isFillComplete (), std::runtime_error, prefix << "The matrix "
299 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
300 "initialize() or compute() with this matrix until the matrix is fill "
301 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
302 "underlying graph is fill complete.");
303
304 blockSize_ = this->A_->getBlockSize();
305 this->A_local_ = this->makeLocalFilter(this->A_);
306
307 Teuchos::Time timer ("RBILUK::initialize");
308 double startTime = timer.wallTime();
309 { // Start timing
310 Teuchos::TimeMonitor timeMon (timer);
311
312 // Calling initialize() means that the user asserts that the graph
313 // of the sparse matrix may have changed. We must not just reuse
314 // the previous graph in that case.
315 //
316 // Regarding setting isAllocated_ to false: Eventually, we may want
317 // some kind of clever memory reuse strategy, but it's always
318 // correct just to blow everything away and start over.
319 this->isInitialized_ = false;
320 this->isAllocated_ = false;
321 this->isComputed_ = false;
322 this->Graph_ = Teuchos::null;
323
324 RCP<const crs_graph_type> matrixCrsGraph = getBlockCrsGraph<MatrixType>(this->A_);
325 this->Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type,kk_handle_type> (matrixCrsGraph,
326 this->LevelOfFill_, 0));
327
328 if (this->isKokkosKernelsSpiluk_) {
329 this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
330 const auto numRows = this->A_local_->getLocalNumRows();
331 KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
332 numRows,
333 2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1),
334 2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1),
335 blockSize_);
336 this->Graph_->initialize(KernelHandle_); // this calls spiluk_symbolic
337
338 this->L_Sptrsv_KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
339 this->U_Sptrsv_KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
340
341 KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;
342
343 this->L_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_);
344 this->U_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_);
345 }
346 else {
347 this->Graph_->initialize ();
348 }
349
350 allocate_L_and_U_blocks ();
351
352#ifdef IFPACK2_RBILUK_INITIAL
353 initAllValues ();
354#endif
355 } // Stop timing
356
357 this->isInitialized_ = true;
358 this->numInitialize_ += 1;
359 this->initializeTime_ += (timer.wallTime() - startTime);
360}
361
362
363template<class MatrixType>
364void
367{
368 using Teuchos::RCP;
369 typedef Tpetra::Map<LO,GO,node_type> map_type;
370
371 LO NumIn = 0, NumL = 0, NumU = 0;
372 bool DiagFound = false;
373 size_t NumNonzeroDiags = 0;
374 size_t MaxNumEntries = this->A_->getLocalMaxNumRowEntries();
375 LO blockMatSize = blockSize_*blockSize_;
376
377 // First check that the local row map ordering is the same as the local portion of the column map.
378 // The extraction of the strictly lower/upper parts of A, as well as the factorization,
379 // implicitly assume that this is the case.
380 Teuchos::ArrayView<const GO> rowGIDs = this->A_->getRowMap()->getLocalElementList();
381 Teuchos::ArrayView<const GO> colGIDs = this->A_->getColMap()->getLocalElementList();
382 bool gidsAreConsistentlyOrdered=true;
383 GO indexOfInconsistentGID=0;
384 for (GO i=0; i<rowGIDs.size(); ++i) {
385 if (rowGIDs[i] != colGIDs[i]) {
386 gidsAreConsistentlyOrdered=false;
387 indexOfInconsistentGID=i;
388 break;
389 }
390 }
391 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
392 "The ordering of the local GIDs in the row and column maps is not the same"
393 << std::endl << "at index " << indexOfInconsistentGID
394 << ". Consistency is required, as all calculations are done with"
395 << std::endl << "local indexing.");
396
397 // Allocate temporary space for extracting the strictly
398 // lower and upper parts of the matrix A.
399 Teuchos::Array<LO> LI(MaxNumEntries);
400 Teuchos::Array<LO> UI(MaxNumEntries);
401 Teuchos::Array<scalar_type> LV(MaxNumEntries*blockMatSize);
402 Teuchos::Array<scalar_type> UV(MaxNumEntries*blockMatSize);
403
404 Teuchos::Array<scalar_type> diagValues(blockMatSize);
405
406 L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
407 U_block_->setAllToScalar (STM::zero ());
408 D_block_->setAllToScalar (STM::zero ()); // Set diagonal values to zero
409
410 // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
411 // host, so sync to host first. The const_cast is unfortunate but
412 // is our only option to make this correct.
413
414 /*
415 const_cast<block_crs_matrix_type&> (A).sync_host ();
416 L_block_->sync_host ();
417 U_block_->sync_host ();
418 D_block_->sync_host ();
419 // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
420 L_block_->modify_host ();
421 U_block_->modify_host ();
422 D_block_->modify_host ();
423 */
424
425 RCP<const map_type> rowMap = L_block_->getRowMap ();
426
427 // First we copy the user's matrix into L and U, regardless of fill level.
428 // It is important to note that L and U are populated using local indices.
429 // This means that if the row map GIDs are not monotonically increasing
430 // (i.e., permuted or gappy), then the strictly lower (upper) part of the
431 // matrix is not the one that you would get if you based L (U) on GIDs.
432 // This is ok, as the *order* of the GIDs in the rowmap is a better
433 // expression of the user's intent than the GIDs themselves.
434
435 //TODO BMK: Revisit this fence when BlockCrsMatrix is refactored.
436 Kokkos::fence();
437 using LocalRowHandler_t = LocalRowHandler<MatrixType>;
438 LocalRowHandler_t localRowHandler(this->A_);
439 typename LocalRowHandler_t::inds_type InI;
440 typename LocalRowHandler_t::vals_type InV;
441 for (size_t myRow=0; myRow<this->A_->getLocalNumRows(); ++myRow) {
442 LO local_row = myRow;
443
444 localRowHandler.getLocalRow(local_row, InI, InV, NumIn);
445
446 // Split into L and U (we don't assume that indices are ordered).
447 NumL = 0;
448 NumU = 0;
449 DiagFound = false;
450
451 for (LO j = 0; j < NumIn; ++j) {
452 const LO k = InI[j];
453 const LO blockOffset = blockMatSize*j;
454
455 if (k == local_row) {
456 DiagFound = true;
457 // Store perturbed diagonal in Tpetra::Vector D_
458 for (LO jj = 0; jj < blockMatSize; ++jj)
459 diagValues[jj] = this->Rthresh_ * InV[blockOffset+jj] + IFPACK2_SGN(InV[blockOffset+jj]) * this->Athresh_;
460 D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
461 }
462 else if (k < 0) { // Out of range
463 TEUCHOS_TEST_FOR_EXCEPTION(
464 true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
465 "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
466 "probably an artifact of the undocumented assumptions of the "
467 "original implementation (likely copied and pasted from Ifpack). "
468 "Nevertheless, the code I found here insisted on this being an error "
469 "state, so I will throw an exception here.");
470 }
471 else if (k < local_row) {
472 LI[NumL] = k;
473 const LO LBlockOffset = NumL*blockMatSize;
474 for (LO jj = 0; jj < blockMatSize; ++jj)
475 LV[LBlockOffset+jj] = InV[blockOffset+jj];
476 NumL++;
477 }
478 else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
479 UI[NumU] = k;
480 const LO UBlockOffset = NumU*blockMatSize;
481 for (LO jj = 0; jj < blockMatSize; ++jj)
482 UV[UBlockOffset+jj] = InV[blockOffset+jj];
483 NumU++;
484 }
485 }
486
487 // Check in things for this row of L and U
488
489 if (DiagFound) {
490 ++NumNonzeroDiags;
491 } else
492 {
493 for (LO jj = 0; jj < blockSize_; ++jj)
494 diagValues[jj*(blockSize_+1)] = this->Athresh_;
495 D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
496 }
497
498 if (NumL) {
499 L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
500 }
501
502 if (NumU) {
503 U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
504 }
505 }
506
507 // NOTE (mfh 27 May 2016) Sync back to device, in case compute()
508 // ever gets a device implementation.
509 /*
510 {
511 typedef typename block_crs_matrix_type::device_type device_type;
512 const_cast<block_crs_matrix_type&> (A).template sync<device_type> ();
513 L_block_->template sync<device_type> ();
514 U_block_->template sync<device_type> ();
515 D_block_->template sync<device_type> ();
516 }
517 */
518 this->isInitialized_ = true;
519}
520
521namespace { // (anonymous)
522
523// For a given Kokkos::View type, possibly unmanaged, get the
524// corresponding managed Kokkos::View type. This is handy for
525// translating from little_block_type or little_host_vec_type (both
526// possibly unmanaged) to their managed versions.
527template<class LittleBlockType>
528struct GetManagedView {
529 static_assert (Kokkos::is_view<LittleBlockType>::value,
530 "LittleBlockType must be a Kokkos::View.");
531 typedef Kokkos::View<typename LittleBlockType::non_const_data_type,
532 typename LittleBlockType::array_layout,
533 typename LittleBlockType::device_type> managed_non_const_type;
534 static_assert (static_cast<int> (managed_non_const_type::rank) ==
535 static_cast<int> (LittleBlockType::rank),
536 "managed_non_const_type::rank != LittleBlock::rank. "
537 "Please report this bug to the Ifpack2 developers.");
538};
539
540} // namespace (anonymous)
541
542template<class MatrixType>
544{
545 using Teuchos::RCP;
546 using Teuchos::rcp;
547 using Teuchos::Array;
548
549 typedef impl_scalar_type IST;
550 const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
551
552 // initialize() checks this too, but it's easier for users if the
553 // error shows them the name of the method that they actually
554 // called, rather than the name of some internally called method.
555 TEUCHOS_TEST_FOR_EXCEPTION
556 (this->A_.is_null (), std::runtime_error, prefix << "The matrix (A_, "
557 "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
558 "input before calling this method.");
559 TEUCHOS_TEST_FOR_EXCEPTION
560 (! this->A_->isFillComplete (), std::runtime_error, prefix << "The matrix "
561 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
562 "initialize() or compute() with this matrix until the matrix is fill "
563 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
564 "underlying graph is fill complete.");
565
566 if (! this->isInitialized ()) {
567 initialize (); // Don't count this in the compute() time
568 }
569
570 // // NOTE (mfh 27 May 2016) The factorization below occurs entirely on
571 // // host, so sync to host first. The const_cast is unfortunate but
572 // // is our only option to make this correct.
573 // if (! A_block_.is_null ()) {
574 // Teuchos::RCP<block_crs_matrix_type> A_nc =
575 // Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
576 // // A_nc->sync_host ();
577 // }
578 /*
579 L_block_->sync_host ();
580 U_block_->sync_host ();
581 D_block_->sync_host ();
582 // NOTE (mfh 27 May 2016) We're modifying L, U, and D on host.
583 L_block_->modify_host ();
584 U_block_->modify_host ();
585 D_block_->modify_host ();
586 */
587
588 Teuchos::Time timer ("RBILUK::compute");
589 double startTime = timer.wallTime();
590 { // Start timing
591 Teuchos::TimeMonitor timeMon (timer);
592 this->isComputed_ = false;
593
594 // MinMachNum should be officially defined, for now pick something a little
595 // bigger than IEEE underflow value
596
597// const scalar_type MinDiagonalValue = STS::rmin ();
598// const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
599 if (!this->isKokkosKernelsSpiluk_) {
600 initAllValues ();
601 size_t NumIn;
602 LO NumL, NumU, NumURead;
603
604 // Get Maximum Row length
605 const size_t MaxNumEntries =
606 L_block_->getLocalMaxNumRowEntries () + U_block_->getLocalMaxNumRowEntries () + 1;
607
608 const LO blockMatSize = blockSize_*blockSize_;
609
610 // FIXME (mfh 08 Nov 2015, 24 May 2016) We need to move away from
611 // expressing these strides explicitly, in order to finish #177
612 // (complete Kokkos-ization of BlockCrsMatrix) thoroughly.
613 const LO rowStride = blockSize_;
614
615 Teuchos::Array<int> ipiv_teuchos(blockSize_);
616 Kokkos::View<int*, Kokkos::HostSpace,
617 Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.getRawPtr (), blockSize_);
618 Teuchos::Array<IST> work_teuchos(blockSize_);
619 Kokkos::View<IST*, Kokkos::HostSpace,
620 Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
621
622 size_t num_cols = U_block_->getColMap()->getLocalNumElements();
623 Teuchos::Array<int> colflag(num_cols);
624
625 typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock ("diagModBlock", blockSize_, blockSize_);
626 typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp ("matTmp", blockSize_, blockSize_);
627 typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier ("multiplier", blockSize_, blockSize_);
628
629// Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
630
631 // Now start the factorization.
632
633 // Need some integer workspace and pointers
634 LO NumUU;
635 for (size_t j = 0; j < num_cols; ++j) {
636 colflag[j] = -1;
637 }
638 Teuchos::Array<LO> InI(MaxNumEntries, 0);
639 Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
640
641 const LO numLocalRows = L_block_->getLocalNumRows ();
642 for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
643
644 // Fill InV, InI with current row of L, D and U combined
645
646 NumIn = MaxNumEntries;
647 local_inds_host_view_type colValsL;
648 values_host_view_type valsL;
649
650 L_block_->getLocalRowView(local_row, colValsL, valsL);
651 NumL = (LO) colValsL.size();
652 for (LO j = 0; j < NumL; ++j)
653 {
654 const LO matOffset = blockMatSize*j;
655 little_block_host_type lmat((typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
656 little_block_host_type lmatV((typename little_block_host_type::value_type*) &InV[matOffset],blockSize_,rowStride);
657 //lmatV.assign(lmat);
658 Tpetra::COPY (lmat, lmatV);
659 InI[j] = colValsL[j];
660 }
661
662 little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
663 little_block_host_type dmatV((typename little_block_host_type::value_type*) &InV[NumL*blockMatSize], blockSize_, rowStride);
664 //dmatV.assign(dmat);
665 Tpetra::COPY (dmat, dmatV);
666 InI[NumL] = local_row;
667
668 local_inds_host_view_type colValsU;
669 values_host_view_type valsU;
670 U_block_->getLocalRowView(local_row, colValsU, valsU);
671 NumURead = (LO) colValsU.size();
672 NumU = 0;
673 for (LO j = 0; j < NumURead; ++j)
674 {
675 if (!(colValsU[j] < numLocalRows)) continue;
676 InI[NumL+1+j] = colValsU[j];
677 const LO matOffset = blockMatSize*(NumL+1+j);
678 little_block_host_type umat((typename little_block_host_type::value_type*) &valsU[blockMatSize*j], blockSize_, rowStride);
679 little_block_host_type umatV((typename little_block_host_type::value_type*) &InV[matOffset], blockSize_, rowStride);
680 //umatV.assign(umat);
681 Tpetra::COPY (umat, umatV);
682 NumU += 1;
683 }
684 NumIn = NumL+NumU+1;
685
686 // Set column flags
687 for (size_t j = 0; j < NumIn; ++j) {
688 colflag[InI[j]] = j;
689 }
690
691#ifndef IFPACK2_RBILUK_INITIAL
692 for (LO i = 0; i < blockSize_; ++i)
693 for (LO j = 0; j < blockSize_; ++j){
694 {
695 diagModBlock(i,j) = 0;
696 }
697 }
698#else
699 scalar_type diagmod = STM::zero (); // Off-diagonal accumulator
700 Kokkos::deep_copy (diagModBlock, diagmod);
701#endif
702
703 for (LO jj = 0; jj < NumL; ++jj) {
704 LO j = InI[jj];
705 little_block_host_type currentVal((typename little_block_host_type::value_type*) &InV[jj*blockMatSize], blockSize_, rowStride); // current_mults++;
706 //multiplier.assign(currentVal);
707 Tpetra::COPY (currentVal, multiplier);
708
709 const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j,j);
710 // alpha = 1, beta = 0
711#ifndef IFPACK2_RBILUK_INITIAL_NOKK
712 KokkosBatched::SerialGemm
713 <KokkosBatched::Trans::NoTranspose,
714 KokkosBatched::Trans::NoTranspose,
715 KokkosBatched::Algo::Gemm::Blocked>::invoke
716 (STS::one (), currentVal, dmatInverse, STS::zero (), matTmp);
717#else
718 Tpetra::GEMM ("N", "N", STS::one (), currentVal, dmatInverse,
719 STS::zero (), matTmp);
720#endif
721 //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.data ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.data ()), reinterpret_cast<impl_scalar_type*> (matTmp.data ()), blockSize_);
722 //currentVal.assign(matTmp);
723 Tpetra::COPY (matTmp, currentVal);
724 local_inds_host_view_type UUI;
725 values_host_view_type UUV;
726
727 U_block_->getLocalRowView(j, UUI, UUV);
728 NumUU = (LO) UUI.size();
729
730 if (this->RelaxValue_ == STM::zero ()) {
731 for (LO k = 0; k < NumUU; ++k) {
732 if (!(UUI[k] < numLocalRows)) continue;
733 const int kk = colflag[UUI[k]];
734 if (kk > -1) {
735 little_block_host_type kkval((typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
736 little_block_host_type uumat((typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
737#ifndef IFPACK2_RBILUK_INITIAL_NOKK
738 KokkosBatched::SerialGemm
739 <KokkosBatched::Trans::NoTranspose,
740 KokkosBatched::Trans::NoTranspose,
741 KokkosBatched::Algo::Gemm::Blocked>::invoke
742 ( magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
743#else
744 Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
745 STM::one (), kkval);
746#endif
747 //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.data ()), reinterpret_cast<impl_scalar_type*> (uumat.data ()), reinterpret_cast<impl_scalar_type*> (kkval.data ()), blockSize_, -STM::one(), STM::one());
748 }
749 }
750 }
751 else {
752 for (LO k = 0; k < NumUU; ++k) {
753 if (!(UUI[k] < numLocalRows)) continue;
754 const int kk = colflag[UUI[k]];
755 little_block_host_type uumat((typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
756 if (kk > -1) {
757 little_block_host_type kkval((typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
758#ifndef IFPACK2_RBILUK_INITIAL_NOKK
759 KokkosBatched::SerialGemm
760 <KokkosBatched::Trans::NoTranspose,
761 KokkosBatched::Trans::NoTranspose,
762 KokkosBatched::Algo::Gemm::Blocked>::invoke
763 (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
764#else
765 Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
766 STM::one (), kkval);
767#endif
768 //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(kkval.data ()), blockSize_, -STM::one(), STM::one());
769 }
770 else {
771#ifndef IFPACK2_RBILUK_INITIAL_NOKK
772 KokkosBatched::SerialGemm
773 <KokkosBatched::Trans::NoTranspose,
774 KokkosBatched::Trans::NoTranspose,
775 KokkosBatched::Algo::Gemm::Blocked>::invoke
776 (magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), diagModBlock);
777#else
778 Tpetra::GEMM ("N", "N", magnitude_type(-STM::one ()), multiplier, uumat,
779 STM::one (), diagModBlock);
780#endif
781 //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.data ()), reinterpret_cast<impl_scalar_type*>(uumat.data ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.data ()), blockSize_, -STM::one(), STM::one());
782 }
783 }
784 }
785 }
786 if (NumL) {
787 // Replace current row of L
788 L_block_->replaceLocalValues (local_row, InI.getRawPtr (), InV.getRawPtr (), NumL);
789 }
790
791 // dmat.assign(dmatV);
792 Tpetra::COPY (dmatV, dmat);
793
794 if (this->RelaxValue_ != STM::zero ()) {
795 //dmat.update(this->RelaxValue_, diagModBlock);
796 Tpetra::AXPY (this->RelaxValue_, diagModBlock, dmat);
797 }
798
799// if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
800// if (STS::real (DV[i]) < STM::zero ()) {
801// DV[i] = -MinDiagonalValue;
802// }
803// else {
804// DV[i] = MinDiagonalValue;
805// }
806// }
807// else
808 {
809 int lapackInfo = 0;
810 for (int k = 0; k < blockSize_; ++k) {
811 ipiv[k] = 0;
812 }
813
814 Tpetra::GETF2 (dmat, ipiv, lapackInfo);
815 //lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
816 TEUCHOS_TEST_FOR_EXCEPTION(
817 lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
818 "lapackInfo = " << lapackInfo << " which indicates an error in the factorization GETRF.");
819
820 Tpetra::GETRI (dmat, ipiv, work, lapackInfo);
821 //lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
822 TEUCHOS_TEST_FOR_EXCEPTION(
823 lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
824 "lapackInfo = " << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
825 }
826
827 for (LO j = 0; j < NumU; ++j) {
828 little_block_host_type currentVal((typename little_block_host_type::value_type*) &InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride); // current_mults++;
829 // scale U by the diagonal inverse
830#ifndef IFPACK2_RBILUK_INITIAL_NOKK
831 KokkosBatched::SerialGemm
832 <KokkosBatched::Trans::NoTranspose,
833 KokkosBatched::Trans::NoTranspose,
834 KokkosBatched::Algo::Gemm::Blocked>::invoke
835 (STS::one (), dmat, currentVal, STS::zero (), matTmp);
836#else
837 Tpetra::GEMM ("N", "N", STS::one (), dmat, currentVal,
838 STS::zero (), matTmp);
839#endif
840 //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.data ()), reinterpret_cast<impl_scalar_type*>(currentVal.data ()), reinterpret_cast<impl_scalar_type*>(matTmp.data ()), blockSize_);
841 //currentVal.assign(matTmp);
842 Tpetra::COPY (matTmp, currentVal);
843 }
844
845 if (NumU) {
846 // Replace current row of L and U
847 U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
848 }
849
850#ifndef IFPACK2_RBILUK_INITIAL
851 // Reset column flags
852 for (size_t j = 0; j < NumIn; ++j) {
853 colflag[InI[j]] = -1;
854 }
855#else
856 // Reset column flags
857 for (size_t j = 0; j < num_cols; ++j) {
858 colflag[j] = -1;
859 }
860#endif
861 }
862 } // !this->isKokkosKernelsSpiluk_
863 else {
864 RCP<const block_crs_matrix_type> A_local_bcrs = Details::getBcrsMatrix(this->A_local_);
865 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(this->A_local_);
866 if (A_local_bcrs.is_null()) {
867 if (A_local_crs.is_null()) {
868 local_ordinal_type numRows = this->A_local_->getLocalNumRows();
869 Array<size_t> entriesPerRow(numRows);
870 for(local_ordinal_type i = 0; i < numRows; i++) {
871 entriesPerRow[i] = this->A_local_->getNumEntriesInLocalRow(i);
872 }
873 RCP<crs_matrix_type> A_local_crs_nc =
874 rcp (new crs_matrix_type (this->A_local_->getRowMap (),
875 this->A_local_->getColMap (),
876 entriesPerRow()));
877 // copy entries into A_local_crs
878 nonconst_local_inds_host_view_type indices("indices",this->A_local_->getLocalMaxNumRowEntries());
879 nonconst_values_host_view_type values("values",this->A_local_->getLocalMaxNumRowEntries());
880 for(local_ordinal_type i = 0; i < numRows; i++) {
881 size_t numEntries = 0;
882 this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
883 A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()),indices.data());
884 }
885 A_local_crs_nc->fillComplete (this->A_local_->getDomainMap (), this->A_local_->getRangeMap ());
886 A_local_crs = Teuchos::rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
887 }
888 // Create bcrs from crs
889 // We can skip fillLogicalBlocks if we know the input is already filled
890 if (blockSize_ > 1) {
891 auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs, blockSize_);
892 A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize_);
893 }
894 else {
895 A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*A_local_crs, blockSize_);
896 }
897 }
898
899 TEUCHOS_TEST_FOR_EXCEPTION(
900 this->isKokkosKernelsStream_, std::runtime_error, "Ifpack2::RBILUK::compute: "
901 "streams are not yet supported.");
902
903 auto lclMtx = A_local_bcrs->getLocalMatrixDevice();
904 auto A_local_rowmap = lclMtx.graph.row_map;
905 auto A_local_entries = lclMtx.graph.entries;
906 auto A_local_values = lclMtx.values;
907
908 // L_block_->resumeFill ();
909 // U_block_->resumeFill ();
910
911 if (L_block_->isLocallyIndexed ()) {
912 L_block_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
913 U_block_->setAllToScalar (STS::zero ());
914 }
915
916 using row_map_type = typename local_matrix_device_type::row_map_type;
917
918 auto lclL = L_block_->getLocalMatrixDevice();
919 row_map_type L_rowmap = lclL.graph.row_map;
920 auto L_entries = lclL.graph.entries;
921 auto L_values = lclL.values;
922
923 auto lclU = U_block_->getLocalMatrixDevice();
924 row_map_type U_rowmap = lclU.graph.row_map;
925 auto U_entries = lclU.graph.entries;
926 auto U_values = lclU.values;
927
928 KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), this->LevelOfFill_,
929 A_local_rowmap, A_local_entries, A_local_values,
930 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
931
932 // Now call symbolic for sptrsvs
933 KokkosSparse::Experimental::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values);
934 KokkosSparse::Experimental::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values);
935 }
936 } // Stop timing
937
938 // Sync everything back to device, for efficient solves.
939 /*
940 {
941 typedef typename block_crs_matrix_type::device_type device_type;
942 if (! A_block_.is_null ()) {
943 Teuchos::RCP<block_crs_matrix_type> A_nc =
944 Teuchos::rcp_const_cast<block_crs_matrix_type> (A_block_);
945 A_nc->template sync<device_type> ();
946 }
947 L_block_->template sync<device_type> ();
948 U_block_->template sync<device_type> ();
949 D_block_->template sync<device_type> ();
950 }
951 */
952
953 this->isComputed_ = true;
954 this->numCompute_ += 1;
955 this->computeTime_ += (timer.wallTime() - startTime);
956}
957
958
959template<class MatrixType>
960void
962apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
963 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
964 Teuchos::ETransp mode,
965 scalar_type alpha,
966 scalar_type beta) const
967{
968 using Teuchos::RCP;
969 typedef Tpetra::BlockMultiVector<scalar_type,
971
972 TEUCHOS_TEST_FOR_EXCEPTION(
973 this->A_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::apply: The matrix is "
974 "null. Please call setMatrix() with a nonnull input, then initialize() "
975 "and compute(), before calling this method.");
976 TEUCHOS_TEST_FOR_EXCEPTION(
977 ! this->isComputed (), std::runtime_error,
978 "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
979 "you must call compute() before calling this method.");
980 TEUCHOS_TEST_FOR_EXCEPTION(
981 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
982 "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
983 "X.getNumVectors() = " << X.getNumVectors ()
984 << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
985 TEUCHOS_TEST_FOR_EXCEPTION(
986 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
987 "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
988 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
989 "fixed. There is a FIXME in this file about this very issue.");
990
991 const LO blockMatSize = blockSize_*blockSize_;
992
993 const LO rowStride = blockSize_;
994
995 BMV yBlock (Y, * (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_);
996 const BMV xBlock (X, * (this->A_->getColMap ()), blockSize_);
997
998 Teuchos::Array<scalar_type> lclarray(blockSize_);
999 little_host_vec_type lclvec((typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
1000 const scalar_type one = STM::one ();
1001 const scalar_type zero = STM::zero ();
1002
1003 Teuchos::Time timer ("RBILUK::apply");
1004 double startTime = timer.wallTime();
1005 { // Start timing
1006 Teuchos::TimeMonitor timeMon (timer);
1007 if (!this->isKokkosKernelsSpiluk_) {
1008 if (alpha == one && beta == zero) {
1009 if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
1010 // Start by solving L C = X for C. C must have the same Map
1011 // as D. We have to use a temp multivector, since our
1012 // implementation of triangular solves does not allow its
1013 // input and output to alias one another.
1014 //
1015 // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
1016 const LO numVectors = xBlock.getNumVectors();
1017 BMV cBlock (* (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_, numVectors);
1018 BMV rBlock (* (this->Graph_->getA_Graph()->getDomainMap ()), blockSize_, numVectors);
1019 for (LO imv = 0; imv < numVectors; ++imv)
1020 {
1021 for (size_t i = 0; i < D_block_->getLocalNumRows(); ++i)
1022 {
1023 LO local_row = i;
1024 const_host_little_vec_type xval =
1025 xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1026 little_host_vec_type cval =
1027 cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1028 //cval.assign(xval);
1029 Tpetra::COPY (xval, cval);
1030
1031 local_inds_host_view_type colValsL;
1032 values_host_view_type valsL;
1033 L_block_->getLocalRowView(local_row, colValsL, valsL);
1034 LO NumL = (LO) colValsL.size();
1035
1036 for (LO j = 0; j < NumL; ++j)
1037 {
1038 LO col = colValsL[j];
1039 const_host_little_vec_type prevVal =
1040 cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1041
1042 const LO matOffset = blockMatSize*j;
1043 little_block_host_type lij((typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
1044
1045 //cval.matvecUpdate(-one, lij, prevVal);
1046 Tpetra::GEMV (-one, lij, prevVal, cval);
1047 }
1048 }
1049 }
1050
1051 // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
1052 D_block_->applyBlock(cBlock, rBlock);
1053
1054 // Solve U Y = R.
1055 for (LO imv = 0; imv < numVectors; ++imv)
1056 {
1057 const LO numRows = D_block_->getLocalNumRows();
1058 for (LO i = 0; i < numRows; ++i)
1059 {
1060 LO local_row = (numRows-1)-i;
1061 const_host_little_vec_type rval =
1062 rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1063 little_host_vec_type yval =
1064 yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1065 //yval.assign(rval);
1066 Tpetra::COPY (rval, yval);
1067
1068 local_inds_host_view_type colValsU;
1069 values_host_view_type valsU;
1070 U_block_->getLocalRowView(local_row, colValsU, valsU);
1071 LO NumU = (LO) colValsU.size();
1072
1073 for (LO j = 0; j < NumU; ++j)
1074 {
1075 LO col = colValsU[NumU-1-j];
1076 const_host_little_vec_type prevVal =
1077 yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1078
1079 const LO matOffset = blockMatSize*(NumU-1-j);
1080 little_block_host_type uij((typename little_block_host_type::value_type*) &valsU[matOffset], blockSize_, rowStride);
1081
1082 //yval.matvecUpdate(-one, uij, prevVal);
1083 Tpetra::GEMV (-one, uij, prevVal, yval);
1084 }
1085 }
1086 }
1087 }
1088 else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
1089 TEUCHOS_TEST_FOR_EXCEPTION(
1090 true, std::runtime_error,
1091 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1092 }
1093 }
1094 else { // alpha != 1 or beta != 0
1095 if (alpha == zero) {
1096 if (beta == zero) {
1097 Y.putScalar (zero);
1098 } else {
1099 Y.scale (beta);
1100 }
1101 } else { // alpha != zero
1102 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1103 apply (X, Y_tmp, mode);
1104 Y.update (alpha, Y_tmp, beta);
1105 }
1106 }
1107 }
1108 else {
1109 // Kokkos kernels impl.
1110 auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
1111 auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
1112
1113 auto lclL = L_block_->getLocalMatrixDevice();
1114 auto L_rowmap = lclL.graph.row_map;
1115 auto L_entries = lclL.graph.entries;
1116 auto L_values = lclL.values;
1117
1118 auto lclU = U_block_->getLocalMatrixDevice();
1119 auto U_rowmap = lclU.graph.row_map;
1120 auto U_entries = lclU.graph.entries;
1121 auto U_values = lclU.values;
1122
1123 if (mode == Teuchos::NO_TRANS) {
1124 {
1125 const LO numVecs = X.getNumVectors();
1126 for (LO vec = 0; vec < numVecs; ++vec) {
1127 auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec);
1128 auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1129 KokkosSparse::Experimental::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_);
1130 }
1131 }
1132
1133 {
1134 const LO numVecs = X.getNumVectors();
1135 for (LO vec = 0; vec < numVecs; ++vec) {
1136 auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1137 KokkosSparse::Experimental::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view);
1138 }
1139 }
1140
1141 KokkosBlas::axpby(alpha, Y_views, beta, Y_views);
1142 }
1143 else {
1144 TEUCHOS_TEST_FOR_EXCEPTION(
1145 true, std::runtime_error,
1146 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1147 }
1148
1149 //Y.getWrappedDualView().sync();
1150 }
1151 } // Stop timing
1152
1153 this->numApply_ += 1;
1154 this->applyTime_ += (timer.wallTime() - startTime);
1155}
1156
1157
1158template<class MatrixType>
1160{
1161 std::ostringstream os;
1162
1163 // Output is a valid YAML dictionary in flow style. If you don't
1164 // like everything on a single line, you should call describe()
1165 // instead.
1166 os << "\"Ifpack2::Experimental::RBILUK\": {";
1167 os << "Initialized: " << (this->isInitialized () ? "true" : "false") << ", "
1168 << "Computed: " << (this->isComputed () ? "true" : "false") << ", ";
1169
1170 os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
1171
1172 if (this->A_.is_null ()) {
1173 os << "Matrix: null";
1174 }
1175 else {
1176 os << "Global matrix dimensions: ["
1177 << this->A_->getGlobalNumRows () << ", " << this->A_->getGlobalNumCols () << "]"
1178 << ", Global nnz: " << this->A_->getGlobalNumEntries();
1179 }
1180
1181 os << "}";
1182 return os.str ();
1183}
1184
1185} // namespace Experimental
1186
1187} // namespace Ifpack2
1188
1189// FIXME (mfh 26 Aug 2015) We only need to do instantiation for
1190// MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
1191// handled internally via dynamic cast.
1192
1193#define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S,LO,GO,N) \
1194 template class Ifpack2::Experimental::RBILUK< Tpetra::BlockCrsMatrix<S, LO, GO, N> >; \
1195 template class Ifpack2::Experimental::RBILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
1196
1197#endif
File for utility functions.
Teuchos::RCP< Tpetra::CrsGraph< typename graph_type::local_ordinal_type, typename graph_type::global_ordinal_type, typename graph_type::node_type > > computeDiagonalGraph(graph_type const &graph)
Compute and return the graph of the diagonal of the input graph.
Definition Ifpack2_Utilities.hpp:35
ILU(k) factorization of a given Tpetra::BlockCrsMatrix.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:97
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:117
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:160
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:120
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:109
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition Ifpack2_Experimental_RBILUK_def.hpp:108
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition Ifpack2_Experimental_RBILUK_def.hpp:962
void compute()
Compute the (numeric) incomplete factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:543
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:126
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:103
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:113
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition Ifpack2_Experimental_RBILUK_def.hpp:103
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:146
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:286
std::string description() const
A one-line description of this object.
Definition Ifpack2_Experimental_RBILUK_def.hpp:1159
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition Ifpack2_Experimental_RBILUK_def.hpp:132
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition Ifpack2_Experimental_RBILUK_decl.hpp:132
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition Ifpack2_IlukGraph.hpp:67
static Teuchos::RCP< const row_matrix_type > makeLocalFilter(const Teuchos::RCP< const row_matrix_type > &A)
Definition Ifpack2_RILUK_def.hpp:448
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:41