Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_ScaledDampedResidual_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
11#define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
12
13#include "Tpetra_CrsMatrix.hpp"
14#include "Tpetra_MultiVector.hpp"
15#include "Tpetra_Operator.hpp"
16#include "Tpetra_Vector.hpp"
17#include "Tpetra_Export_decl.hpp"
18#include "Tpetra_Import_decl.hpp"
19#include "Kokkos_ArithTraits.hpp"
20#include "Teuchos_Assert.hpp"
21#include <type_traits>
22#include "KokkosSparse_spmv_impl.hpp"
23
24namespace Ifpack2 {
25namespace Details {
26namespace Impl {
27
32template<class WVector,
33 class DVector,
34 class BVector,
35 class AMatrix,
36 class XVector,
37 class Scalar,
38 bool use_beta>
39struct ScaledDampedResidualVectorFunctor {
40 static_assert (static_cast<int> (WVector::rank) == 1,
41 "WVector must be a rank 1 View.");
42 static_assert (static_cast<int> (DVector::rank) == 1,
43 "DVector must be a rank 1 View.");
44 static_assert (static_cast<int> (BVector::rank) == 1,
45 "BVector must be a rank 1 View.");
46 static_assert (static_cast<int> (XVector::rank) == 1,
47 "XVector must be a rank 1 View.");
48
49 using execution_space = typename AMatrix::execution_space;
50 using LO = typename AMatrix::non_const_ordinal_type;
51 using value_type = typename AMatrix::non_const_value_type;
52 using team_policy = typename Kokkos::TeamPolicy<execution_space>;
53 using team_member = typename team_policy::member_type;
54 using ATV = Kokkos::ArithTraits<value_type>;
55
56 const Scalar alpha;
57 WVector m_w;
58 DVector m_d;
59 BVector m_b;
60 AMatrix m_A;
61 XVector m_x;
62 const Scalar beta;
63
64 const LO rows_per_team;
65
66 ScaledDampedResidualVectorFunctor (const Scalar& alpha_,
67 const WVector& m_w_,
68 const DVector& m_d_,
69 const BVector& m_b_,
70 const AMatrix& m_A_,
71 const XVector& m_x_,
72 const Scalar& beta_,
73 const int rows_per_team_) :
74 alpha (alpha_),
75 m_w (m_w_),
76 m_d (m_d_),
77 m_b (m_b_),
78 m_A (m_A_),
79 m_x (m_x_),
80 beta (beta_),
81 rows_per_team (rows_per_team_)
82 {
83 const size_t numRows = m_A.numRows ();
84 const size_t numCols = m_A.numCols ();
85
86 TEUCHOS_ASSERT( m_w.extent (0) == m_d.extent (0) );
87 TEUCHOS_ASSERT( m_w.extent (0) == m_b.extent (0) );
88 TEUCHOS_ASSERT( numRows == size_t (m_w.extent (0)) );
89 TEUCHOS_ASSERT( numCols <= size_t (m_x.extent (0)) );
90 }
91
92 KOKKOS_INLINE_FUNCTION
93 void operator() (const team_member& dev) const
94 {
95 using residual_value_type = typename BVector::non_const_value_type;
96 using KAT = Kokkos::ArithTraits<residual_value_type>;
97
98 Kokkos::parallel_for
99 (Kokkos::TeamThreadRange (dev, 0, rows_per_team),
100 [&] (const LO& loop) {
101 const LO lclRow =
102 static_cast<LO> (dev.league_rank ()) * rows_per_team + loop;
103 if (lclRow >= m_A.numRows ()) {
104 return;
105 }
106 const auto A_row = m_A.rowConst(lclRow);
107 const LO row_length = static_cast<LO> (A_row.length);
108 residual_value_type A_x = KAT::zero ();
109
110 Kokkos::parallel_reduce
111 (Kokkos::ThreadVectorRange (dev, row_length),
112 [&] (const LO iEntry, residual_value_type& lsum) {
113 const auto A_val = A_row.value(iEntry);
114 lsum += A_val * m_x(A_row.colidx(iEntry));
115 }, A_x);
116
117 Kokkos::single
118 (Kokkos::PerThread(dev),
119 [&] () {
120 const auto alpha_D_res =
121 alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
122 if (use_beta) {
123 m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
124 }
125 else {
126 m_w(lclRow) = alpha_D_res;
127 }
128 });
129 });
130 }
131
132};
133
134
135// W := alpha * D * (B - A*X) + beta * W.
136template<class WVector,
137 class DVector,
138 class BVector,
139 class AMatrix,
140 class XVector,
141 class Scalar>
142static void
143scaled_damped_residual_vector
144(const Scalar& alpha,
145 const WVector& w,
146 const DVector& d,
147 const BVector& b,
148 const AMatrix& A,
149 const XVector& x,
150 const Scalar& beta)
151{
152 using execution_space = typename AMatrix::execution_space;
153
154 if (A.numRows () == 0) {
155 return;
156 }
157
158 int team_size = -1;
159 int vector_length = -1;
160 int64_t rows_per_thread = -1;
161
162 const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
163 int64_t worksets = (b.extent (0) + rows_per_team - 1) / rows_per_team;
164
165 using Kokkos::Dynamic;
166 using Kokkos::Static;
167 using Kokkos::Schedule;
168 using Kokkos::TeamPolicy;
169 using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
170 using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
171 const char kernel_label[] = "scaled_damped_residual_vector";
172 policy_type_dynamic policyDynamic (1, 1);
173 policy_type_static policyStatic (1, 1);
174 if (team_size < 0) {
175 policyDynamic = policy_type_dynamic (worksets, Kokkos::AUTO, vector_length);
176 policyStatic = policy_type_static (worksets, Kokkos::AUTO, vector_length);
177 }
178 else {
179 policyDynamic = policy_type_dynamic (worksets, team_size, vector_length);
180 policyStatic = policy_type_static (worksets, team_size, vector_length);
181 }
182
183 // Canonicalize template arguments to avoid redundant instantiations.
184 using w_vec_type = typename WVector::non_const_type;
185 using d_vec_type = typename DVector::const_type;
186 using b_vec_type = typename BVector::const_type;
187 using matrix_type = AMatrix;
188 using x_vec_type = typename XVector::const_type;
189 using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
190
191 if (beta == Kokkos::ArithTraits<Scalar>::zero ()) {
192 constexpr bool use_beta = false;
193 using functor_type =
194 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
195 b_vec_type, matrix_type,
196 x_vec_type, scalar_type,
197 use_beta>;
198 functor_type func (alpha, w, d, b, A, x, beta, rows_per_team);
199 if(A.nnz()>10000000)
200 Kokkos::parallel_for (kernel_label, policyDynamic, func);
201 else
202 Kokkos::parallel_for (kernel_label, policyStatic, func);
203 }
204 else {
205 constexpr bool use_beta = true;
206 using functor_type =
207 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
208 b_vec_type, matrix_type,
209 x_vec_type, scalar_type,
210 use_beta>;
211 functor_type func (alpha, w, d, b, A, x, beta, rows_per_team);
212 if(A.nnz()>10000000)
213 Kokkos::parallel_for (kernel_label, policyDynamic, func);
214 else
215 Kokkos::parallel_for (kernel_label, policyStatic, func);
216 }
217}
218
219} // namespace Impl
220
221template<class TpetraOperatorType>
223ScaledDampedResidual (const Teuchos::RCP<const operator_type>& A)
224{
225 setMatrix (A);
226}
227
228template<class TpetraOperatorType>
229void
231setMatrix (const Teuchos::RCP<const operator_type>& A)
232{
233 if (A_op_.get () != A.get ()) {
234 A_op_ = A;
235
236 // We'll (re)allocate these on demand.
237 X_colMap_ = std::unique_ptr<vector_type> (nullptr);
238 V1_ = std::unique_ptr<multivector_type> (nullptr);
239
240 using Teuchos::rcp_dynamic_cast;
241 Teuchos::RCP<const crs_matrix_type> A_crs =
242 rcp_dynamic_cast<const crs_matrix_type> (A);
243 if (A_crs.is_null ()) {
244 A_crs_ = Teuchos::null;
245 imp_ = Teuchos::null;
246 exp_ = Teuchos::null;
247 }
248 else {
249 TEUCHOS_ASSERT( A_crs->isFillComplete () );
250 A_crs_ = A_crs;
251 auto G = A_crs->getCrsGraph ();
252 imp_ = G->getImporter ();
253 exp_ = G->getExporter ();
254 }
255 }
256}
257
258template<class TpetraOperatorType>
259void
261compute (multivector_type& W,
262 const SC& alpha,
263 vector_type& D_inv,
264 multivector_type& B,
265 multivector_type& X,
266 const SC& beta)
267{
268 using Teuchos::RCP;
269 using Teuchos::rcp;
270
271 if (canFuse (B)) {
272 // "nonconst" here has no effect other than on the return type.
273 W_vec_ = W.getVectorNonConst (0);
274 B_vec_ = B.getVectorNonConst (0);
275 X_vec_ = X.getVectorNonConst (0);
276 TEUCHOS_ASSERT( ! A_crs_.is_null () );
277 fusedCase (*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
278 }
279 else {
280 TEUCHOS_ASSERT( ! A_op_.is_null () );
281 unfusedCase (W, alpha, D_inv, B, *A_op_, X, beta);
282 }
283}
284
285template<class TpetraOperatorType>
286typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
288importVector (vector_type& X_domMap)
289{
290 if (imp_.is_null ()) {
291 return X_domMap;
292 }
293 else {
294 if (X_colMap_.get () == nullptr) {
295 using V = vector_type;
296 X_colMap_ = std::unique_ptr<V> (new V (imp_->getTargetMap ()));
297 }
298 X_colMap_->doImport (X_domMap, *imp_, Tpetra::REPLACE);
299 return *X_colMap_;
300 }
301}
302
303template<class TpetraOperatorType>
304bool
306canFuse (const multivector_type& B) const
307{
308 return B.getNumVectors () == size_t (1) &&
309 ! A_crs_.is_null () &&
310 exp_.is_null ();
311}
312
313template<class TpetraOperatorType>
314void
316unfusedCase (multivector_type& W,
317 const SC& alpha,
318 vector_type& D_inv,
319 multivector_type& B,
320 const operator_type& A,
321 multivector_type& X,
322 const SC& beta)
323{
324 if (V1_.get () == nullptr) {
325 using MV = multivector_type;
326 const size_t numVecs = B.getNumVectors ();
327 V1_ = std::unique_ptr<MV> (new MV (B.getMap (), numVecs));
328 }
329 const SC one = Teuchos::ScalarTraits<SC>::one ();
330
331 // V1 = B - A*X
332 Tpetra::deep_copy (*V1_, B);
333 A.apply (X, *V1_, Teuchos::NO_TRANS, -one, one);
334
335 // W := alpha * D_inv * V1 + beta * W
336 W.elementWiseMultiply (alpha, D_inv, *V1_, beta);
337}
338
339template<class TpetraOperatorType>
340void
342fusedCase (vector_type& W,
343 const SC& alpha,
344 vector_type& D_inv,
345 vector_type& B,
346 const crs_matrix_type& A,
347 vector_type& X,
348 const SC& beta)
349{
350 vector_type& X_colMap = importVector (X);
351
352 using Impl::scaled_damped_residual_vector;
353 using STS = Teuchos::ScalarTraits<SC>;
354
355 auto A_lcl = A.getLocalMatrixDevice ();
356 auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
357 auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
358 auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
359 if (beta == STS::zero ()) {
360 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
361 scaled_damped_residual_vector (alpha, W_lcl, Dinv_lcl,
362 B_lcl, A_lcl, X_lcl, beta);
363 }
364 else { // need to read _and_ write W if beta != 0
365 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
366 scaled_damped_residual_vector (alpha, W_lcl, Dinv_lcl,
367 B_lcl, A_lcl, X_lcl, beta);
368 }
369}
370
371} // namespace Details
372} // namespace Ifpack2
373
374#define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC,LO,GO,NT) \
375 template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
376
377#endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Compute scaled damped residual for Chebyshev.
Definition Ifpack2_Details_ScaledDampedResidual_decl.hpp:45
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:41
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition Ifpack2_Details_ScaledDampedResidual_def.hpp:39