547 using Teuchos::Array;
549 typedef impl_scalar_type IST;
550 const char prefix[] =
"Ifpack2::Experimental::RBILUK::compute: ";
555 TEUCHOS_TEST_FOR_EXCEPTION
556 (this->
A_.is_null (), std::runtime_error, prefix <<
"The matrix (A_, "
557 "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
558 "input before calling this method.");
559 TEUCHOS_TEST_FOR_EXCEPTION
560 (! this->
A_->isFillComplete (), std::runtime_error, prefix <<
"The matrix "
561 "(A_, the BlockCrsMatrix) is not fill complete. You may not invoke "
562 "initialize() or compute() with this matrix until the matrix is fill "
563 "complete. Note: BlockCrsMatrix is fill complete if and only if its "
564 "underlying graph is fill complete.");
588 Teuchos::Time timer (
"RBILUK::compute");
589 double startTime = timer.wallTime();
591 Teuchos::TimeMonitor timeMon (timer);
592 this->isComputed_ =
false;
602 LO NumL, NumU, NumURead;
605 const size_t MaxNumEntries =
606 L_block_->getLocalMaxNumRowEntries () + U_block_->getLocalMaxNumRowEntries () + 1;
608 const LO blockMatSize = blockSize_*blockSize_;
613 const LO rowStride = blockSize_;
615 Teuchos::Array<int> ipiv_teuchos(blockSize_);
616 Kokkos::View<
int*, Kokkos::HostSpace,
617 Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.getRawPtr (), blockSize_);
618 Teuchos::Array<IST> work_teuchos(blockSize_);
619 Kokkos::View<IST*, Kokkos::HostSpace,
620 Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
622 size_t num_cols = U_block_->getColMap()->getLocalNumElements();
623 Teuchos::Array<int> colflag(num_cols);
625 typename GetManagedView<little_block_host_type>::managed_non_const_type diagModBlock (
"diagModBlock", blockSize_, blockSize_);
626 typename GetManagedView<little_block_host_type>::managed_non_const_type matTmp (
"matTmp", blockSize_, blockSize_);
627 typename GetManagedView<little_block_host_type>::managed_non_const_type multiplier (
"multiplier", blockSize_, blockSize_);
635 for (
size_t j = 0; j < num_cols; ++j) {
638 Teuchos::Array<LO> InI(MaxNumEntries, 0);
639 Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
641 const LO numLocalRows = L_block_->getLocalNumRows ();
642 for (LO local_row = 0; local_row < numLocalRows; ++local_row) {
646 NumIn = MaxNumEntries;
647 local_inds_host_view_type colValsL;
648 values_host_view_type valsL;
650 L_block_->getLocalRowView(local_row, colValsL, valsL);
651 NumL = (LO) colValsL.size();
652 for (LO j = 0; j < NumL; ++j)
654 const LO matOffset = blockMatSize*j;
655 little_block_host_type lmat((
typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
656 little_block_host_type lmatV((
typename little_block_host_type::value_type*) &InV[matOffset],blockSize_,rowStride);
658 Tpetra::COPY (lmat, lmatV);
659 InI[j] = colValsL[j];
662 little_block_host_type dmat = D_block_->getLocalBlockHostNonConst(local_row, local_row);
663 little_block_host_type dmatV((
typename little_block_host_type::value_type*) &InV[NumL*blockMatSize], blockSize_, rowStride);
665 Tpetra::COPY (dmat, dmatV);
666 InI[NumL] = local_row;
668 local_inds_host_view_type colValsU;
669 values_host_view_type valsU;
670 U_block_->getLocalRowView(local_row, colValsU, valsU);
671 NumURead = (LO) colValsU.size();
673 for (LO j = 0; j < NumURead; ++j)
675 if (!(colValsU[j] < numLocalRows))
continue;
676 InI[NumL+1+j] = colValsU[j];
677 const LO matOffset = blockMatSize*(NumL+1+j);
678 little_block_host_type umat((
typename little_block_host_type::value_type*) &valsU[blockMatSize*j], blockSize_, rowStride);
679 little_block_host_type umatV((
typename little_block_host_type::value_type*) &InV[matOffset], blockSize_, rowStride);
681 Tpetra::COPY (umat, umatV);
687 for (
size_t j = 0; j < NumIn; ++j) {
691#ifndef IFPACK2_RBILUK_INITIAL
692 for (LO i = 0; i < blockSize_; ++i)
693 for (LO j = 0; j < blockSize_; ++j){
695 diagModBlock(i,j) = 0;
700 Kokkos::deep_copy (diagModBlock, diagmod);
703 for (LO jj = 0; jj < NumL; ++jj) {
705 little_block_host_type currentVal((
typename little_block_host_type::value_type*) &InV[jj*blockMatSize], blockSize_, rowStride);
707 Tpetra::COPY (currentVal, multiplier);
709 const little_block_host_type dmatInverse = D_block_->getLocalBlockHostNonConst(j,j);
711#ifndef IFPACK2_RBILUK_INITIAL_NOKK
712 KokkosBatched::SerialGemm
713 <KokkosBatched::Trans::NoTranspose,
714 KokkosBatched::Trans::NoTranspose,
715 KokkosBatched::Algo::Gemm::Blocked>::invoke
716 (STS::one (), currentVal, dmatInverse, STS::zero (), matTmp);
718 Tpetra::GEMM (
"N",
"N", STS::one (), currentVal, dmatInverse,
719 STS::zero (), matTmp);
723 Tpetra::COPY (matTmp, currentVal);
724 local_inds_host_view_type UUI;
725 values_host_view_type UUV;
727 U_block_->getLocalRowView(j, UUI, UUV);
728 NumUU = (LO) UUI.size();
730 if (this->RelaxValue_ == STM::zero ()) {
731 for (LO k = 0; k < NumUU; ++k) {
732 if (!(UUI[k] < numLocalRows))
continue;
733 const int kk = colflag[UUI[k]];
735 little_block_host_type kkval((
typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
736 little_block_host_type uumat((
typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
737#ifndef IFPACK2_RBILUK_INITIAL_NOKK
738 KokkosBatched::SerialGemm
739 <KokkosBatched::Trans::NoTranspose,
740 KokkosBatched::Trans::NoTranspose,
741 KokkosBatched::Algo::Gemm::Blocked>::invoke
742 (
magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
744 Tpetra::GEMM (
"N",
"N",
magnitude_type(-STM::one ()), multiplier, uumat,
752 for (LO k = 0; k < NumUU; ++k) {
753 if (!(UUI[k] < numLocalRows))
continue;
754 const int kk = colflag[UUI[k]];
755 little_block_host_type uumat((
typename little_block_host_type::value_type*) &UUV[k*blockMatSize], blockSize_, rowStride);
757 little_block_host_type kkval((
typename little_block_host_type::value_type*) &InV[kk*blockMatSize], blockSize_, rowStride);
758#ifndef IFPACK2_RBILUK_INITIAL_NOKK
759 KokkosBatched::SerialGemm
760 <KokkosBatched::Trans::NoTranspose,
761 KokkosBatched::Trans::NoTranspose,
762 KokkosBatched::Algo::Gemm::Blocked>::invoke
763 (
magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), kkval);
765 Tpetra::GEMM (
"N",
"N",
magnitude_type(-STM::one ()), multiplier, uumat,
771#ifndef IFPACK2_RBILUK_INITIAL_NOKK
772 KokkosBatched::SerialGemm
773 <KokkosBatched::Trans::NoTranspose,
774 KokkosBatched::Trans::NoTranspose,
775 KokkosBatched::Algo::Gemm::Blocked>::invoke
776 (
magnitude_type(-STM::one ()), multiplier, uumat, STM::one (), diagModBlock);
778 Tpetra::GEMM (
"N",
"N",
magnitude_type(-STM::one ()), multiplier, uumat,
779 STM::one (), diagModBlock);
788 L_block_->replaceLocalValues (local_row, InI.getRawPtr (), InV.getRawPtr (), NumL);
792 Tpetra::COPY (dmatV, dmat);
794 if (this->RelaxValue_ != STM::zero ()) {
796 Tpetra::AXPY (this->RelaxValue_, diagModBlock, dmat);
810 for (
int k = 0; k < blockSize_; ++k) {
814 Tpetra::GETF2 (dmat, ipiv, lapackInfo);
816 TEUCHOS_TEST_FOR_EXCEPTION(
817 lapackInfo != 0, std::runtime_error,
"Ifpack2::Experimental::RBILUK::compute: "
818 "lapackInfo = " << lapackInfo <<
" which indicates an error in the factorization GETRF.");
820 Tpetra::GETRI (dmat, ipiv, work, lapackInfo);
822 TEUCHOS_TEST_FOR_EXCEPTION(
823 lapackInfo != 0, std::runtime_error,
"Ifpack2::Experimental::RBILUK::compute: "
824 "lapackInfo = " << lapackInfo <<
" which indicates an error in the matrix inverse GETRI.");
827 for (LO j = 0; j < NumU; ++j) {
828 little_block_host_type currentVal((
typename little_block_host_type::value_type*) &InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride);
830#ifndef IFPACK2_RBILUK_INITIAL_NOKK
831 KokkosBatched::SerialGemm
832 <KokkosBatched::Trans::NoTranspose,
833 KokkosBatched::Trans::NoTranspose,
834 KokkosBatched::Algo::Gemm::Blocked>::invoke
835 (STS::one (), dmat, currentVal, STS::zero (), matTmp);
837 Tpetra::GEMM (
"N",
"N", STS::one (), dmat, currentVal,
838 STS::zero (), matTmp);
842 Tpetra::COPY (matTmp, currentVal);
847 U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
850#ifndef IFPACK2_RBILUK_INITIAL
852 for (
size_t j = 0; j < NumIn; ++j) {
853 colflag[InI[j]] = -1;
857 for (
size_t j = 0; j < num_cols; ++j) {
864 RCP<const block_crs_matrix_type> A_local_bcrs = Details::getBcrsMatrix(this->
A_local_);
865 RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(this->
A_local_);
866 if (A_local_bcrs.is_null()) {
867 if (A_local_crs.is_null()) {
869 Array<size_t> entriesPerRow(numRows);
871 entriesPerRow[i] = this->
A_local_->getNumEntriesInLocalRow(i);
873 RCP<crs_matrix_type> A_local_crs_nc =
875 this->A_local_->getColMap (),
878 nonconst_local_inds_host_view_type indices(
"indices",this->
A_local_->getLocalMaxNumRowEntries());
879 nonconst_values_host_view_type values(
"values",this->
A_local_->getLocalMaxNumRowEntries());
881 size_t numEntries = 0;
882 this->
A_local_->getLocalRowCopy(i, indices, values, numEntries);
883 A_local_crs_nc->insertLocalValues(i, numEntries,
reinterpret_cast<scalar_type*
>(values.data()),indices.data());
885 A_local_crs_nc->fillComplete (this->
A_local_->getDomainMap (), this->A_local_->getRangeMap ());
886 A_local_crs = Teuchos::rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
890 if (blockSize_ > 1) {
891 auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs, blockSize_);
892 A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize_);
895 A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*A_local_crs, blockSize_);
899 TEUCHOS_TEST_FOR_EXCEPTION(
900 this->isKokkosKernelsStream_, std::runtime_error,
"Ifpack2::RBILUK::compute: "
901 "streams are not yet supported.");
903 auto lclMtx = A_local_bcrs->getLocalMatrixDevice();
904 auto A_local_rowmap = lclMtx.graph.row_map;
905 auto A_local_entries = lclMtx.graph.entries;
906 auto A_local_values = lclMtx.values;
911 if (L_block_->isLocallyIndexed ()) {
912 L_block_->setAllToScalar (STS::zero ());
913 U_block_->setAllToScalar (STS::zero ());
916 using row_map_type =
typename local_matrix_device_type::row_map_type;
918 auto lclL = L_block_->getLocalMatrixDevice();
919 row_map_type L_rowmap = lclL.graph.row_map;
920 auto L_entries = lclL.graph.entries;
921 auto L_values = lclL.values;
923 auto lclU = U_block_->getLocalMatrixDevice();
924 row_map_type U_rowmap = lclU.graph.row_map;
925 auto U_entries = lclU.graph.entries;
926 auto U_values = lclU.values;
928 KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), this->LevelOfFill_,
929 A_local_rowmap, A_local_entries, A_local_values,
930 L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );
933 KokkosSparse::Experimental::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values);
934 KokkosSparse::Experimental::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values);
953 this->isComputed_ =
true;
954 this->numCompute_ += 1;
955 this->computeTime_ += (timer.wallTime() - startTime);
962apply (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
963 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
964 Teuchos::ETransp mode,
972 TEUCHOS_TEST_FOR_EXCEPTION(
973 this->
A_.is_null (), std::runtime_error,
"Ifpack2::Experimental::RBILUK::apply: The matrix is "
974 "null. Please call setMatrix() with a nonnull input, then initialize() "
975 "and compute(), before calling this method.");
976 TEUCHOS_TEST_FOR_EXCEPTION(
978 "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
979 "you must call compute() before calling this method.");
980 TEUCHOS_TEST_FOR_EXCEPTION(
981 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
982 "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
983 "X.getNumVectors() = " << X.getNumVectors ()
984 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
985 TEUCHOS_TEST_FOR_EXCEPTION(
986 STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
987 "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
988 "complex Scalar type. Please talk to the Ifpack2 developers to get this "
989 "fixed. There is a FIXME in this file about this very issue.");
991 const LO blockMatSize = blockSize_*blockSize_;
993 const LO rowStride = blockSize_;
995 BMV yBlock (Y, * (this->
Graph_->getA_Graph()->getDomainMap ()), blockSize_);
996 const BMV xBlock (X, * (this->
A_->getColMap ()), blockSize_);
998 Teuchos::Array<scalar_type> lclarray(blockSize_);
999 little_host_vec_type lclvec((
typename little_host_vec_type::value_type*)&lclarray[0], blockSize_);
1003 Teuchos::Time timer (
"RBILUK::apply");
1004 double startTime = timer.wallTime();
1006 Teuchos::TimeMonitor timeMon (timer);
1008 if (alpha == one && beta == zero) {
1009 if (mode == Teuchos::NO_TRANS) {
1016 const LO numVectors = xBlock.getNumVectors();
1017 BMV cBlock (* (this->
Graph_->getA_Graph()->getDomainMap ()), blockSize_, numVectors);
1018 BMV rBlock (* (this->
Graph_->getA_Graph()->getDomainMap ()), blockSize_, numVectors);
1019 for (LO imv = 0; imv < numVectors; ++imv)
1021 for (
size_t i = 0; i < D_block_->getLocalNumRows(); ++i)
1024 const_host_little_vec_type xval =
1025 xBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1026 little_host_vec_type cval =
1027 cBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1029 Tpetra::COPY (xval, cval);
1031 local_inds_host_view_type colValsL;
1032 values_host_view_type valsL;
1033 L_block_->getLocalRowView(local_row, colValsL, valsL);
1034 LO NumL = (LO) colValsL.size();
1036 for (LO j = 0; j < NumL; ++j)
1038 LO col = colValsL[j];
1039 const_host_little_vec_type prevVal =
1040 cBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1042 const LO matOffset = blockMatSize*j;
1043 little_block_host_type lij((
typename little_block_host_type::value_type*) &valsL[matOffset],blockSize_,rowStride);
1046 Tpetra::GEMV (-one, lij, prevVal, cval);
1052 D_block_->applyBlock(cBlock, rBlock);
1055 for (LO imv = 0; imv < numVectors; ++imv)
1057 const LO numRows = D_block_->getLocalNumRows();
1058 for (LO i = 0; i < numRows; ++i)
1060 LO local_row = (numRows-1)-i;
1061 const_host_little_vec_type rval =
1062 rBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::ReadOnly);
1063 little_host_vec_type yval =
1064 yBlock.getLocalBlockHost(local_row, imv, Tpetra::Access::OverwriteAll);
1066 Tpetra::COPY (rval, yval);
1068 local_inds_host_view_type colValsU;
1069 values_host_view_type valsU;
1070 U_block_->getLocalRowView(local_row, colValsU, valsU);
1071 LO NumU = (LO) colValsU.size();
1073 for (LO j = 0; j < NumU; ++j)
1075 LO col = colValsU[NumU-1-j];
1076 const_host_little_vec_type prevVal =
1077 yBlock.getLocalBlockHost(col, imv, Tpetra::Access::ReadOnly);
1079 const LO matOffset = blockMatSize*(NumU-1-j);
1080 little_block_host_type uij((
typename little_block_host_type::value_type*) &valsU[matOffset], blockSize_, rowStride);
1083 Tpetra::GEMV (-one, uij, prevVal, yval);
1089 TEUCHOS_TEST_FOR_EXCEPTION(
1090 true, std::runtime_error,
1091 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1095 if (alpha == zero) {
1102 MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1103 apply (X, Y_tmp, mode);
1104 Y.update (alpha, Y_tmp, beta);
1110 auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
1111 auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);
1113 auto lclL = L_block_->getLocalMatrixDevice();
1114 auto L_rowmap = lclL.graph.row_map;
1115 auto L_entries = lclL.graph.entries;
1116 auto L_values = lclL.values;
1118 auto lclU = U_block_->getLocalMatrixDevice();
1119 auto U_rowmap = lclU.graph.row_map;
1120 auto U_entries = lclU.graph.entries;
1121 auto U_values = lclU.values;
1123 if (mode == Teuchos::NO_TRANS) {
1125 const LO numVecs = X.getNumVectors();
1126 for (LO vec = 0; vec < numVecs; ++vec) {
1127 auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec);
1128 auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1129 KokkosSparse::Experimental::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_);
1134 const LO numVecs = X.getNumVectors();
1135 for (LO vec = 0; vec < numVecs; ++vec) {
1136 auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
1137 KokkosSparse::Experimental::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view);
1141 KokkosBlas::axpby(alpha, Y_views, beta, Y_views);
1144 TEUCHOS_TEST_FOR_EXCEPTION(
1145 true, std::runtime_error,
1146 "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
1153 this->numApply_ += 1;
1154 this->applyTime_ += (timer.wallTime() - startTime);