550 A_local_crs_ = Details::getCrsMatrix(
A_local_);
551 if(A_local_crs_.is_null()) {
553 Array<size_t> entriesPerRow(numRows);
555 entriesPerRow[i] =
A_local_->getNumEntriesInLocalRow(i);
562 nonconst_local_inds_host_view_type indices(
"indices",
A_local_->getLocalMaxNumRowEntries());
563 nonconst_values_host_view_type values(
"values",
A_local_->getLocalMaxNumRowEntries());
565 size_t numEntries = 0;
566 A_local_->getLocalRowCopy(i, indices, values, numEntries);
567 A_local_crs_nc_->insertLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()), indices.data());
569 A_local_crs_nc_->fillComplete (
A_local_->getDomainMap (),
A_local_->getRangeMap ());
570 A_local_crs_ = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc_);
572 if (!isKokkosKernelsStream_) {
574 LevelOfFill_, 0, Overalloc_));
577 std::vector<int> weights(num_streams_);
578 std::fill(weights.begin(), weights.end(), 1);
579 exec_space_instances_ = Kokkos::Experimental::partition_space(
execution_space(), weights);
581 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
582 if (!hasStreamReordered_) {
583 KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks);
585 perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks,
true);
586 reverse_perm_v_.resize(perm_v_.size());
587 for(
size_t istream=0; istream < perm_v_.size(); ++istream) {
588 using perm_type =
typename lno_nonzero_view_t::non_const_type;
589 const auto perm = perm_v_[istream];
590 const auto perm_length = perm.extent(0);
591 perm_type reverse_perm(
592 Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverse_perm"),
594 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(exec_space_instances_[istream], 0, perm_length),
596 reverse_perm(perm(ii)) = ii;
598 reverse_perm_v_[istream] = reverse_perm;
602 A_local_diagblks_rowmap_v_ = std::vector<lno_row_view_t>(num_streams_);
603 A_local_diagblks_entries_v_ = std::vector<lno_nonzero_view_t>(num_streams_);
604 A_local_diagblks_values_v_ = std::vector<scalar_nonzero_view_t>(num_streams_);
606 for(
int i = 0; i < num_streams_; i++) {
607 A_local_diagblks_rowmap_v_[i] = A_local_diagblks[i].graph.row_map;
608 A_local_diagblks_entries_v_[i] = A_local_diagblks[i].graph.entries;
609 A_local_diagblks_values_v_[i] = A_local_diagblks[i].values;
611 Teuchos::RCP<const crs_map_type> A_local_diagblks_RowMap = rcp (
new crs_map_type(A_local_diagblks[i].numRows(),
612 A_local_diagblks[i].numRows(),
613 A_local_crs_->getRowMap()->getComm()));
614 Teuchos::RCP<const crs_map_type> A_local_diagblks_ColMap = rcp (
new crs_map_type(A_local_diagblks[i].numCols(),
615 A_local_diagblks[i].numCols(),
616 A_local_crs_->getColMap()->getComm()));
617 Teuchos::RCP<crs_matrix_type> A_local_diagblks_ = rcp (
new crs_matrix_type(A_local_diagblks_RowMap,
618 A_local_diagblks_ColMap,
619 A_local_diagblks[i]));
621 LevelOfFill_, 0, Overalloc_));
626 if (this->isKokkosKernelsSpiluk_) {
627 if (!isKokkosKernelsStream_) {
628 this->KernelHandle_ = Teuchos::rcp (
new kk_handle_type ());
629 KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
630 A_local_->getLocalNumRows(),
631 2*A_local_->getLocalNumEntries()*(LevelOfFill_+1),
632 2*A_local_->getLocalNumEntries()*(LevelOfFill_+1) );
633 Graph_->initialize (KernelHandle_);
636 KernelHandle_v_ = std::vector< Teuchos::RCP<kk_handle_type> >(num_streams_);
637 for (
int i = 0; i < num_streams_; i++) {
638 KernelHandle_v_[i] = Teuchos::rcp (
new kk_handle_type ());
639 KernelHandle_v_[i]->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
640 A_local_diagblks[i].numRows(),
641 2*A_local_diagblks[i].nnz()*(LevelOfFill_+1),
642 2*A_local_diagblks[i].nnz()*(LevelOfFill_+1) );
643 Graph_v_[i]->initialize (KernelHandle_v_[i]);
648 Graph_->initialize ();
652 checkOrderingConsistency (*A_local_);
653 if (!isKokkosKernelsStream_) {
654 L_solver_->setMatrix (L_);
657 L_solver_->setStreamInfo (isKokkosKernelsStream_, num_streams_, exec_space_instances_);
658 L_solver_->setMatrices (L_v_);
660 L_solver_->initialize ();
662 if (!isKokkosKernelsStream_) {
663 U_solver_->setMatrix (U_);
666 U_solver_->setStreamInfo (isKokkosKernelsStream_, num_streams_, exec_space_instances_);
667 U_solver_->setMatrices (U_v_);
669 U_solver_->initialize ();
676 isInitialized_ =
true;
678 initializeTime_ += (timer.wallTime() - startTime);
681template<
class MatrixType>
689 Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getLocalElementList();
690 Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getLocalElementList();
691 bool gidsAreConsistentlyOrdered=
true;
694 if (rowGIDs[i] != colGIDs[i]) {
695 gidsAreConsistentlyOrdered=
false;
696 indexOfInconsistentGID=i;
700 TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==
false, std::runtime_error,
701 "The ordering of the local GIDs in the row and column maps is not the same"
702 << std::endl <<
"at index " << indexOfInconsistentGID
703 <<
". Consistency is required, as all calculations are done with"
704 << std::endl <<
"local indexing.");
707template<
class MatrixType>
712 using Teuchos::ArrayRCP;
716 using Teuchos::REDUCE_SUM;
717 using Teuchos::reduceAll;
718 typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type>
map_type;
720 size_t NumIn = 0, NumL = 0, NumU = 0;
721 bool DiagFound =
false;
722 size_t NumNonzeroDiags = 0;
723 size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
727 nonconst_local_inds_host_view_type InI(
"InI",MaxNumEntries);
728 Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
729 Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
730 nonconst_values_host_view_type InV(
"InV",MaxNumEntries);
731 Teuchos::Array<scalar_type> LV(MaxNumEntries);
732 Teuchos::Array<scalar_type> UV(MaxNumEntries);
735 const bool ReplaceValues = L_->isStaticGraph () || L_->isLocallyIndexed ();
740 L_->setAllToScalar (STS::zero ());
741 U_->setAllToScalar (STS::zero ());
744 D_->putScalar (STS::zero ());
745 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
747 RCP<const map_type> rowMap = L_->getRowMap ();
757 Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getLocalElementList();
758 for (
size_t myRow=0; myRow<A.getLocalNumRows(); ++myRow) {
763 A.getLocalRowCopy (local_row, InI, InV, NumIn);
771 for (
size_t j = 0; j < NumIn; ++j) {
774 if (k == local_row) {
777 DV(local_row) += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
780 TEUCHOS_TEST_FOR_EXCEPTION(
781 true, std::runtime_error,
"Ifpack2::RILUK::initAllValues: current "
782 "GID k = " << k <<
" < 0. I'm not sure why this is an error; it is "
783 "probably an artifact of the undocumented assumptions of the "
784 "original implementation (likely copied and pasted from Ifpack). "
785 "Nevertheless, the code I found here insisted on this being an error "
786 "state, so I will throw an exception here.");
788 else if (k < local_row) {
793 else if (Teuchos::as<size_t>(k) <= rowMap->getLocalNumElements()) {
805 DV(local_row) = Athresh_;
810 L_->replaceLocalValues(local_row, LI(0, NumL), LV(0,NumL));
814 L_->insertLocalValues(local_row, LI(0,NumL), LV(0,NumL));
820 U_->replaceLocalValues(local_row, UI(0,NumU), UV(0,NumU));
824 U_->insertLocalValues(local_row, UI(0,NumU), UV(0,NumU));
832 isInitialized_ =
true;
835template<
class MatrixType>
836void RILUK<MatrixType>::compute_serial ()
840 initAllValues (*A_local_);
845 const scalar_type MinDiagonalValue = STS::rmin ();
846 const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
848 size_t NumIn, NumL, NumU;
851 const size_t MaxNumEntries =
852 L_->getLocalMaxNumRowEntries () + U_->getLocalMaxNumRowEntries () + 1;
854 Teuchos::Array<local_ordinal_type> InI(MaxNumEntries);
855 Teuchos::Array<scalar_type> InV(MaxNumEntries);
856 size_t num_cols = U_->getColMap()->getLocalNumElements();
857 Teuchos::Array<int> colflag(num_cols, -1);
859 auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
863 using IST =
typename row_matrix_type::impl_scalar_type;
864 for (
size_t i = 0; i < L_->getLocalNumRows (); ++i) {
868 local_inds_host_view_type UUI;
869 values_host_view_type UUV;
873 NumIn = MaxNumEntries;
874 nonconst_local_inds_host_view_type InI_v(InI.data(),MaxNumEntries);
875 nonconst_values_host_view_type InV_v(
reinterpret_cast<IST*
>(InV.data()),MaxNumEntries);
877 L_->getLocalRowCopy (local_row, InI_v , InV_v, NumL);
880 InI[NumL] = local_row;
882 nonconst_local_inds_host_view_type InI_sub(InI.data()+NumL+1,MaxNumEntries-NumL-1);
883 nonconst_values_host_view_type InV_sub(
reinterpret_cast<IST*
>(InV.data())+NumL+1,MaxNumEntries-NumL-1);
885 U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU);
889 for (
size_t j = 0; j < NumIn; ++j) {
893 scalar_type diagmod = STS::zero ();
895 for (
size_t jj = 0; jj < NumL; ++jj) {
897 IST multiplier = InV[jj];
899 InV[jj] *=
static_cast<scalar_type
>(DV(j));
901 U_->getLocalRowView(j, UUI, UUV);
904 if (RelaxValue_ == STM::zero ()) {
905 for (
size_t k = 0; k < NumUU; ++k) {
906 const int kk = colflag[UUI[k]];
911 InV[kk] -=
static_cast<scalar_type
>(multiplier * UUV[k]);
917 for (
size_t k = 0; k < NumUU; ++k) {
921 const int kk = colflag[UUI[k]];
923 InV[kk] -=
static_cast<scalar_type
>(multiplier*UUV[k]);
926 diagmod -=
static_cast<scalar_type
>(multiplier*UUV[k]);
934 L_->replaceLocalValues (local_row, InI (0, NumL), InV (0, NumL));
939 if (RelaxValue_ != STM::zero ()) {
940 DV(i) += RelaxValue_*diagmod;
943 if (STS::magnitude (DV(i)) > STS::magnitude (MaxDiagonalValue)) {
944 if (STS::real (DV(i)) < STM::zero ()) {
945 DV(i) = -MinDiagonalValue;
948 DV(i) = MinDiagonalValue;
952 DV(i) =
static_cast<impl_scalar_type
>(STS::one ()) / DV(i);
955 for (
size_t j = 0; j < NumU; ++j) {
956 InV[NumL+1+j] *=
static_cast<scalar_type
>(DV(i));
961 U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU));
965 for (
size_t j = 0; j < NumIn; ++j) {
966 colflag[InI[j]] = -1;
977 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
978 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
981 L_solver_->setMatrix (L_);
982 L_solver_->compute ();
983 U_solver_->setMatrix (U_);
984 U_solver_->compute ();
988template<
class MatrixType>
989void RILUK<MatrixType>::compute_kkspiluk()
994 L_->setAllToScalar (STS::zero ());
995 U_->setAllToScalar (STS::zero ());
997 using row_map_type =
typename crs_matrix_type::local_matrix_device_type::row_map_type;
998 auto lclL = L_->getLocalMatrixDevice();
999 row_map_type L_rowmap = lclL.graph.row_map;
1000 auto L_entries = lclL.graph.entries;
1001 auto L_values = lclL.values;
1003 auto lclU = U_->getLocalMatrixDevice();
1004 row_map_type U_rowmap = lclU.graph.row_map;
1005 auto U_entries = lclU.graph.entries;
1006 auto U_values = lclU.values;
1008 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1009 KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
1010 lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values,
1011 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
1013 L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
1014 U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
1016 L_solver_->compute ();
1017 U_solver_->compute ();
1020template<
class MatrixType>
1021void RILUK<MatrixType>::compute_kkspiluk_stream()
1023 for(
int i = 0; i < num_streams_; i++) {
1024 L_v_[i]->resumeFill ();
1025 U_v_[i]->resumeFill ();
1027 L_v_[i]->setAllToScalar (STS::zero ());
1028 U_v_[i]->setAllToScalar (STS::zero ());
1030 std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
1031 std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
1032 std::vector<scalar_nonzero_view_t> L_values_v(num_streams_);
1033 std::vector<lno_row_view_t> U_rowmap_v(num_streams_);
1034 std::vector<lno_nonzero_view_t> U_entries_v(num_streams_);
1035 std::vector<scalar_nonzero_view_t> U_values_v(num_streams_);
1036 std::vector<kk_handle_type *> KernelHandle_rawptr_v_(num_streams_);
1037 for(
int i = 0; i < num_streams_; i++) {
1038 auto lclL = L_v_[i]->getLocalMatrixDevice();
1039 L_rowmap_v[i] = lclL.graph.row_map;
1040 L_entries_v[i] = lclL.graph.entries;
1041 L_values_v[i] = lclL.values;
1043 auto lclU = U_v_[i]->getLocalMatrixDevice();
1044 U_rowmap_v[i] = lclU.graph.row_map;
1045 U_entries_v[i] = lclU.graph.entries;
1046 U_values_v[i] = lclU.values;
1047 KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr();
1051 auto lclMtx = A_local_crs_->getLocalMatrixDevice();
1054 using TeamPolicy = Kokkos::TeamPolicy<execution_space>;
1055 const auto A_nrows = lclMtx.numRows();
1056 auto rows_per_block = ((A_nrows % num_streams_) == 0)
1057 ? (A_nrows / num_streams_)
1058 : (A_nrows / num_streams_ + 1);
1059 for(
int i = 0; i < num_streams_; i++) {
1060 const auto start_row_offset = i * rows_per_block;
1061 auto rowptrs = A_local_diagblks_rowmap_v_[i];
1062 auto colindices = A_local_diagblks_entries_v_[i];
1063 auto values = A_local_diagblks_values_v_[i];
1064 const bool reordered = hasStreamReordered_;
1065 typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] :
typename lno_nonzero_view_t::non_const_type{};
1066 TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO);
1067 Kokkos::parallel_for(pol, KOKKOS_LAMBDA (
const typename TeamPolicy::member_type &team) {
1068 const auto irow = team.league_rank();
1069 const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow);
1070 const auto A_local_crs_row = lclMtx.rowConst(irow_A);
1071 const auto begin_row = rowptrs(irow);
1072 const auto num_entries = rowptrs(irow + 1) - begin_row;
1073 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](
const int j) {
1074 const auto colidx = colindices(begin_row + j);
1075 const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx);
1077 const int offset = KokkosSparse::findRelOffset(
1078 &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0,
false);
1079 values(begin_row + j) = A_local_crs_row.value(offset);
1085 KokkosSparse::Experimental::spiluk_numeric_streams( exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_,
1086 A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_,
1087 L_rowmap_v, L_entries_v, L_values_v,
1088 U_rowmap_v, U_entries_v, U_values_v );
1089 for(
int i = 0; i < num_streams_; i++) {
1090 L_v_[i]->fillComplete ();
1091 U_v_[i]->fillComplete ();
1094 L_solver_->compute ();
1095 U_solver_->compute ();
1098template<
class MatrixType>
1103 using Teuchos::rcp_const_cast;
1104 using Teuchos::rcp_dynamic_cast;
1105 using Teuchos::Array;
1106 using Teuchos::ArrayView;
1107 const char prefix[] =
"Ifpack2::RILUK::compute: ";
1112 TEUCHOS_TEST_FOR_EXCEPTION
1113 (
A_.is_null (), std::runtime_error, prefix <<
"The matrix is null. Please "
1114 "call setMatrix() with a nonnull input before calling this method.");
1115 TEUCHOS_TEST_FOR_EXCEPTION
1116 (!
A_->isFillComplete (), std::runtime_error, prefix <<
"The matrix is not "
1117 "fill complete. You may not invoke initialize() or compute() with this "
1118 "matrix until the matrix is fill complete. If your matrix is a "
1119 "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
1120 "range Maps, if appropriate) before calling this method.");
1126 Teuchos::Time timer (
"RILUK::compute");
1129 Teuchos::TimeMonitor timeMon (timer);
1130 double startTime = timer.wallTime();
1132 isComputed_ =
false;
1139 if(!A_local_crs_nc_.is_null()) {
1140 A_local_crs_nc_->resumeFill();
1142 Array<size_t> entriesPerRow(numRows);
1144 entriesPerRow[i] =
A_local_->getNumEntriesInLocalRow(i);
1147 nonconst_local_inds_host_view_type indices(
"indices",
A_local_->getLocalMaxNumRowEntries());
1148 nonconst_values_host_view_type values(
"values",
A_local_->getLocalMaxNumRowEntries());
1150 size_t numEntries = 0;
1151 A_local_->getLocalRowCopy(i, indices, values, numEntries);
1152 A_local_crs_nc_->replaceLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()),indices.data());
1154 A_local_crs_nc_->fillComplete (
A_local_->getDomainMap (),
A_local_->getRangeMap ());
1157 if (!isKokkosKernelsStream_) {
1161 compute_kkspiluk_stream();
1167 computeTime_ += (timer.wallTime() - startTime);
1171template <
typename MV,
typename Map>
1172void resetMultiVecIfNeeded(std::unique_ptr<MV> &mv_ptr,
const Map &map,
const size_t numVectors,
bool initialize)
1174 if(!mv_ptr || mv_ptr->getNumVectors() != numVectors) {
1175 mv_ptr.reset(
new MV(map, numVectors, initialize));
1180template<
class MatrixType>
1183apply (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1184 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1185 Teuchos::ETransp mode,
1190 using Teuchos::rcpFromRef;
1192 TEUCHOS_TEST_FOR_EXCEPTION(
1193 A_.is_null (), std::runtime_error,
"Ifpack2::RILUK::apply: The matrix is "
1194 "null. Please call setMatrix() with a nonnull input, then initialize() "
1195 "and compute(), before calling this method.");
1196 TEUCHOS_TEST_FOR_EXCEPTION(
1198 "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1199 "you must call compute() before calling this method.");
1200 TEUCHOS_TEST_FOR_EXCEPTION(
1201 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
1202 "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1203 "X.getNumVectors() = " << X.getNumVectors ()
1204 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
1205 TEUCHOS_TEST_FOR_EXCEPTION(
1206 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1207 "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1208 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1209 "fixed. There is a FIXME in this file about this very issue.");
1210#ifdef HAVE_IFPACK2_DEBUG
1212 if (!isKokkosKernelsStream_) {
1214 TEUCHOS_TEST_FOR_EXCEPTION( STM::isnaninf (D_nrm1), std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1216 Teuchos::Array<magnitude_type> norms (X.getNumVectors ());
1219 for (
size_t j = 0; j < X.getNumVectors (); ++j) {
1220 if (STM::isnaninf (norms[j])) {
1225 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1232 Teuchos::Time timer (
"RILUK::apply");
1233 double startTime = timer.wallTime();
1235 Teuchos::TimeMonitor timeMon (timer);
1236 if (alpha == one && beta == zero) {
1238 Impl::resetMultiVecIfNeeded(reordered_x_, X.getMap(), X.getNumVectors(),
false);
1239 Impl::resetMultiVecIfNeeded(reordered_y_, Y.getMap(), Y.getNumVectors(),
false);
1241 for (
size_t j = 0; j < X.getNumVectors(); j++) {
1242 auto X_j = X.getVector(j);
1243 auto ReorderedX_j = reordered_x_->getVectorNonConst(j);
1244 auto X_lcl = X_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1245 auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1248 for(
int i = 0; i < num_streams_; i++) {
1249 auto perm_i = perm_v_[i];
1250 stream_end = stream_begin + perm_i.extent(0);
1251 auto X_lcl_sub = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1252 auto ReorderedX_lcl_sub = Kokkos::subview (ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1253 Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0,
static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA (
const int& ii ) {
1254 ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii);
1256 stream_begin = stream_end;
1260 if (mode == Teuchos::NO_TRANS) {
1262 L_solver_->apply (*reordered_x_, Y, mode);
1264 U_solver_->apply (Y, *reordered_y_, mode);
1268 U_solver_->apply (*reordered_x_, Y, mode);
1270 L_solver_->apply (Y, *reordered_y_, mode);
1273 for (
size_t j = 0; j < Y.getNumVectors(); j++) {
1274 auto Y_j = Y.getVectorNonConst(j);
1275 auto ReorderedY_j = reordered_y_->getVector(j);
1276 auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite);
1277 auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly);
1280 for(
int i = 0; i < num_streams_; i++) {
1281 auto perm_i = perm_v_[i];
1282 stream_end = stream_begin + perm_i.extent(0);
1283 auto Y_lcl_sub = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1284 auto ReorderedY_lcl_sub = Kokkos::subview (ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0);
1285 Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(exec_space_instances_[i], 0,
static_cast<int>(perm_i.extent(0))), KOKKOS_LAMBDA (
const int& ii ) {
1286 Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii));
1288 stream_begin = stream_end;
1294 if (mode == Teuchos::NO_TRANS) {
1295#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1299 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1307 Y_tmp_->elementWiseMultiply (one, *
D_, *Y_tmp_, zero);
1318 Y.elementWiseMultiply (one, *
D_, Y, zero);
1325#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
1329 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1340 Y_tmp_->elementWiseMultiply (one, *
D_, *Y_tmp_, zero);
1354 Y.elementWiseMultiply (one, *
D_, Y, zero);
1363 if (alpha == zero) {
1373 Impl::resetMultiVecIfNeeded(Y_tmp_, Y.getMap(), Y.getNumVectors(),
false);
1374 apply (X, *Y_tmp_, mode);
1375 Y.update (alpha, *Y_tmp_, beta);
1380#ifdef HAVE_IFPACK2_DEBUG
1382 Teuchos::Array<magnitude_type> norms (Y.getNumVectors ());
1385 for (
size_t j = 0; j < Y.getNumVectors (); ++j) {
1386 if (STM::isnaninf (norms[j])) {
1391 TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error,
"Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1396 applyTime_ += (timer.wallTime() - startTime);
1433template<
class MatrixType>
1436 std::ostringstream os;
1441 os <<
"\"Ifpack2::RILUK\": {";
1442 os <<
"Initialized: " << (
isInitialized () ?
"true" :
"false") <<
", "
1443 <<
"Computed: " << (
isComputed () ?
"true" :
"false") <<
", ";
1448 if(isKokkosKernelsStream_) os<<
"KK-Stream, ";
1450 if (
A_.is_null ()) {
1451 os <<
"Matrix: null";
1454 os <<
"Global matrix dimensions: ["
1455 <<
A_->getGlobalNumRows () <<
", " <<
A_->getGlobalNumCols () <<
"]"
1456 <<
", Global nnz: " <<
A_->getGlobalNumEntries();
1468#define IFPACK2_RILUK_INSTANT(S,LO,GO,N) \
1469 template class Ifpack2::RILUK< Tpetra::RowMatrix<S, LO, GO, N> >;