Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_ChebyshevKernel_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
11#define IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
12
13#include "Tpetra_CrsMatrix.hpp"
14#include "Tpetra_MultiVector.hpp"
15#include "Tpetra_Operator.hpp"
16#include "Tpetra_Vector.hpp"
17#include "Tpetra_Export_decl.hpp"
18#include "Tpetra_Import_decl.hpp"
19#include "Kokkos_ArithTraits.hpp"
20#include "Teuchos_Assert.hpp"
21#include <type_traits>
22#include "KokkosSparse_spmv_impl.hpp"
23
24namespace Ifpack2 {
25namespace Details {
26namespace Impl {
27
32template<class WVector,
33 class DVector,
34 class BVector,
35 class AMatrix,
36 class XVector_colMap,
37 class XVector_domMap,
38 class Scalar,
39 bool use_beta,
40 bool do_X_update>
41struct ChebyshevKernelVectorFunctor {
42 static_assert (static_cast<int> (WVector::rank) == 1,
43 "WVector must be a rank 1 View.");
44 static_assert (static_cast<int> (DVector::rank) == 1,
45 "DVector must be a rank 1 View.");
46 static_assert (static_cast<int> (BVector::rank) == 1,
47 "BVector must be a rank 1 View.");
48 static_assert (static_cast<int> (XVector_colMap::rank) == 1,
49 "XVector_colMap must be a rank 1 View.");
50 static_assert (static_cast<int> (XVector_domMap::rank) == 1,
51 "XVector_domMap must be a rank 1 View.");
52
53 using execution_space = typename AMatrix::execution_space;
54 using LO = typename AMatrix::non_const_ordinal_type;
55 using value_type = typename AMatrix::non_const_value_type;
56 using team_policy = typename Kokkos::TeamPolicy<execution_space>;
57 using team_member = typename team_policy::member_type;
58 using ATV = Kokkos::ArithTraits<value_type>;
59
60 const Scalar alpha;
61 WVector m_w;
62 DVector m_d;
63 BVector m_b;
64 AMatrix m_A;
65 XVector_colMap m_x_colMap;
66 XVector_domMap m_x_domMap;
67 const Scalar beta;
68
69 const LO rows_per_team;
70
71 ChebyshevKernelVectorFunctor (const Scalar& alpha_,
72 const WVector& m_w_,
73 const DVector& m_d_,
74 const BVector& m_b_,
75 const AMatrix& m_A_,
76 const XVector_colMap& m_x_colMap_,
77 const XVector_domMap& m_x_domMap_,
78 const Scalar& beta_,
79 const int rows_per_team_) :
80 alpha (alpha_),
81 m_w (m_w_),
82 m_d (m_d_),
83 m_b (m_b_),
84 m_A (m_A_),
85 m_x_colMap (m_x_colMap_),
86 m_x_domMap (m_x_domMap_),
87 beta (beta_),
88 rows_per_team (rows_per_team_)
89 {
90 const size_t numRows = m_A.numRows ();
91 const size_t numCols = m_A.numCols ();
92
93 TEUCHOS_ASSERT( m_w.extent (0) == m_d.extent (0) );
94 TEUCHOS_ASSERT( m_w.extent (0) == m_b.extent (0) );
95 TEUCHOS_ASSERT( numRows == size_t (m_w.extent (0)) );
96 TEUCHOS_ASSERT( numCols <= size_t (m_x_colMap.extent (0)) );
97 TEUCHOS_ASSERT( numRows <= size_t (m_x_domMap.extent (0)) );
98 }
99
100 KOKKOS_INLINE_FUNCTION
101 void operator() (const team_member& dev) const
102 {
103 using residual_value_type = typename BVector::non_const_value_type;
104 using KAT = Kokkos::ArithTraits<residual_value_type>;
105
106 Kokkos::parallel_for
107 (Kokkos::TeamThreadRange (dev, 0, rows_per_team),
108 [&] (const LO& loop) {
109 const LO lclRow =
110 static_cast<LO> (dev.league_rank ()) * rows_per_team + loop;
111 if (lclRow >= m_A.numRows ()) {
112 return;
113 }
114 const KokkosSparse::SparseRowViewConst<AMatrix> A_row = m_A.rowConst(lclRow);
115 const LO row_length = static_cast<LO> (A_row.length);
116 residual_value_type A_x = KAT::zero ();
117
118 Kokkos::parallel_reduce
119 (Kokkos::ThreadVectorRange (dev, row_length),
120 [&] (const LO iEntry, residual_value_type& lsum) {
121 const auto A_val = A_row.value(iEntry);
122 lsum += A_val * m_x_colMap(A_row.colidx(iEntry));
123 }, A_x);
124
125 Kokkos::single
126 (Kokkos::PerThread(dev),
127 [&] () {
128 const auto alpha_D_res =
129 alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
130 if (use_beta) {
131 m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
132 }
133 else {
134 m_w(lclRow) = alpha_D_res;
135 }
136 if (do_X_update)
137 m_x_domMap(lclRow) += m_w(lclRow);
138 });
139 });
140 }
141
142};
143
144
145// W := alpha * D * (B - A*X) + beta * W.
146template<class WVector,
147 class DVector,
148 class BVector,
149 class AMatrix,
150 class XVector_colMap,
151 class XVector_domMap,
152 class Scalar>
153static void
154chebyshev_kernel_vector
155(const Scalar& alpha,
156 const WVector& w,
157 const DVector& d,
158 const BVector& b,
159 const AMatrix& A,
160 const XVector_colMap& x_colMap,
161 const XVector_domMap& x_domMap,
162 const Scalar& beta,
163 const bool do_X_update)
164{
165 using execution_space = typename AMatrix::execution_space;
166
167 if (A.numRows () == 0) {
168 return;
169 }
170
171 int team_size = -1;
172 int vector_length = -1;
173 int64_t rows_per_thread = -1;
174
175 const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
176 int64_t worksets = (b.extent (0) + rows_per_team - 1) / rows_per_team;
177
178 using Kokkos::Dynamic;
179 using Kokkos::Static;
180 using Kokkos::Schedule;
181 using Kokkos::TeamPolicy;
182 using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
183 using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
184 const char kernel_label[] = "chebyshev_kernel_vector";
185 policy_type_dynamic policyDynamic (1, 1);
186 policy_type_static policyStatic (1, 1);
187 if (team_size < 0) {
188 policyDynamic = policy_type_dynamic (worksets, Kokkos::AUTO, vector_length);
189 policyStatic = policy_type_static (worksets, Kokkos::AUTO, vector_length);
190 }
191 else {
192 policyDynamic = policy_type_dynamic (worksets, team_size, vector_length);
193 policyStatic = policy_type_static (worksets, team_size, vector_length);
194 }
195
196 // Canonicalize template arguments to avoid redundant instantiations.
197 using w_vec_type = typename WVector::non_const_type;
198 using d_vec_type = typename DVector::const_type;
199 using b_vec_type = typename BVector::const_type;
200 using matrix_type = AMatrix;
201 using x_colMap_vec_type = typename XVector_colMap::const_type;
202 using x_domMap_vec_type = typename XVector_domMap::non_const_type;
203 using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
204
205 if (beta == Kokkos::ArithTraits<Scalar>::zero ()) {
206 constexpr bool use_beta = false;
207 if (do_X_update) {
208 using functor_type =
209 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
210 b_vec_type, matrix_type,
211 x_colMap_vec_type, x_domMap_vec_type,
212 scalar_type,
213 use_beta,
214 true>;
215 functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
216 if(A.nnz()>10000000)
217 Kokkos::parallel_for (kernel_label, policyDynamic, func);
218 else
219 Kokkos::parallel_for (kernel_label, policyStatic, func);
220 } else {
221 using functor_type =
222 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
223 b_vec_type, matrix_type,
224 x_colMap_vec_type, x_domMap_vec_type,
225 scalar_type,
226 use_beta,
227 false>;
228 functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
229 if(A.nnz()>10000000)
230 Kokkos::parallel_for (kernel_label, policyDynamic, func);
231 else
232 Kokkos::parallel_for (kernel_label, policyStatic, func);
233 }
234 }
235 else {
236 constexpr bool use_beta = true;
237 if (do_X_update) {
238 using functor_type =
239 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
240 b_vec_type, matrix_type,
241 x_colMap_vec_type, x_domMap_vec_type,
242 scalar_type,
243 use_beta,
244 true>;
245 functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
246 if(A.nnz()>10000000)
247 Kokkos::parallel_for (kernel_label, policyDynamic, func);
248 else
249 Kokkos::parallel_for (kernel_label, policyStatic, func);
250 } else {
251 using functor_type =
252 ChebyshevKernelVectorFunctor<w_vec_type, d_vec_type,
253 b_vec_type, matrix_type,
254 x_colMap_vec_type, x_domMap_vec_type,
255 scalar_type,
256 use_beta,
257 false>;
258 functor_type func (alpha, w, d, b, A, x_colMap, x_domMap, beta, rows_per_team);
259 if(A.nnz()>10000000)
260 Kokkos::parallel_for (kernel_label, policyDynamic, func);
261 else
262 Kokkos::parallel_for (kernel_label, policyStatic, func);
263 }
264 }
265}
266
267} // namespace Impl
268
269template<class TpetraOperatorType>
271ChebyshevKernel (const Teuchos::RCP<const operator_type>& A,
272 const bool useNativeSpMV):
273 useNativeSpMV_(useNativeSpMV)
274{
275 setMatrix (A);
276}
277
278template<class TpetraOperatorType>
279void
280ChebyshevKernel<TpetraOperatorType>::
281setMatrix (const Teuchos::RCP<const operator_type>& A)
282{
283 if (A_op_.get () != A.get ()) {
284 A_op_ = A;
285
286 // We'll (re)allocate these on demand.
287 V1_ = std::unique_ptr<multivector_type> (nullptr);
288
289 using Teuchos::rcp_dynamic_cast;
290 Teuchos::RCP<const crs_matrix_type> A_crs =
291 rcp_dynamic_cast<const crs_matrix_type> (A);
292 if (A_crs.is_null ()) {
293 A_crs_ = Teuchos::null;
294 imp_ = Teuchos::null;
295 exp_ = Teuchos::null;
296 X_colMap_ = nullptr;
297 }
298 else {
299 TEUCHOS_ASSERT( A_crs->isFillComplete () );
300 A_crs_ = A_crs;
301 auto G = A_crs->getCrsGraph ();
302 imp_ = G->getImporter ();
303 exp_ = G->getExporter ();
304 if (!imp_.is_null ()) {
305 if (X_colMap_.get () == nullptr ||
306 !X_colMap_->getMap()->isSameAs (*(imp_->getTargetMap ()))) {
307 X_colMap_ = std::unique_ptr<vector_type> (new vector_type (imp_->getTargetMap ()));
308 }
309 } else
310 X_colMap_ = nullptr;
311 }
312 }
313}
314
315template<class TpetraOperatorType>
316void
317ChebyshevKernel<TpetraOperatorType>::
318compute (multivector_type& W,
319 const SC& alpha,
320 vector_type& D_inv,
321 multivector_type& B,
322 multivector_type& X,
323 const SC& beta)
324{
325 using Teuchos::RCP;
326 using Teuchos::rcp;
327
328 if (canFuse (B)) {
329 // "nonconst" here has no effect other than on the return type.
330 W_vec_ = W.getVectorNonConst (0);
331 B_vec_ = B.getVectorNonConst (0);
332 X_vec_ = X.getVectorNonConst (0);
333 TEUCHOS_ASSERT( ! A_crs_.is_null () );
334 fusedCase (*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
335 }
336 else {
337 TEUCHOS_ASSERT( ! A_op_.is_null () );
338 unfusedCase (W, alpha, D_inv, B, *A_op_, X, beta);
339 }
340}
341
342template<class TpetraOperatorType>
343typename ChebyshevKernel<TpetraOperatorType>::vector_type&
344ChebyshevKernel<TpetraOperatorType>::
345importVector (vector_type& X_domMap)
346{
347 if (imp_.is_null ()) {
348 return X_domMap;
349 }
350 else {
351 X_colMap_->doImport (X_domMap, *imp_, Tpetra::REPLACE);
352 return *X_colMap_;
353 }
354}
355
356template<class TpetraOperatorType>
357bool
358ChebyshevKernel<TpetraOperatorType>::
359canFuse (const multivector_type& B) const
360{
361 // If override is enabled
362 if(useNativeSpMV_)
363 return false;
364
365 // Some criteria must be met for fused kernel
366 return B.getNumVectors () == size_t (1) &&
367 ! A_crs_.is_null () &&
368 exp_.is_null ();
369}
370
371template<class TpetraOperatorType>
372void
373ChebyshevKernel<TpetraOperatorType>::
374unfusedCase (multivector_type& W,
375 const SC& alpha,
376 vector_type& D_inv,
377 multivector_type& B,
378 const operator_type& A,
379 multivector_type& X,
380 const SC& beta)
381{
382 using STS = Teuchos::ScalarTraits<SC>;
383 if (V1_.get () == nullptr) {
384 using MV = multivector_type;
385 const size_t numVecs = B.getNumVectors ();
386 V1_ = std::unique_ptr<MV> (new MV (B.getMap (), numVecs));
387 }
388 const SC one = Teuchos::ScalarTraits<SC>::one ();
389
390 // V1 = B - A*X
391 Tpetra::deep_copy (*V1_, B);
392 A.apply (X, *V1_, Teuchos::NO_TRANS, -one, one);
393
394 // W := alpha * D_inv * V1 + beta * W
395 W.elementWiseMultiply (alpha, D_inv, *V1_, beta);
396
397 // X := X + W
398 X.update (STS::one(), W, STS::one());
399}
400
401template<class TpetraOperatorType>
402void
403ChebyshevKernel<TpetraOperatorType>::
404fusedCase (vector_type& W,
405 const SC& alpha,
406 vector_type& D_inv,
407 vector_type& B,
408 const crs_matrix_type& A,
409 vector_type& X,
410 const SC& beta)
411{
412 vector_type& X_colMap = importVector (X);
413
414 using Impl::chebyshev_kernel_vector;
415 using STS = Teuchos::ScalarTraits<SC>;
416
417 auto A_lcl = A.getLocalMatrixDevice ();
418 //D_inv, B, X and W are all Vectors, so it's safe to take the first column only
419 auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
420 auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
421 auto X_domMap_lcl = Kokkos::subview(X.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
422 auto X_colMap_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
423
424 const bool do_X_update = !imp_.is_null ();
425 if (beta == STS::zero ()) {
426 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
427 chebyshev_kernel_vector (alpha, W_lcl, Dinv_lcl,
428 B_lcl, A_lcl,
429 X_colMap_lcl, X_domMap_lcl,
430 beta,
431 do_X_update);
432 }
433 else { // need to read _and_ write W if beta != 0
434 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
435 chebyshev_kernel_vector (alpha, W_lcl, Dinv_lcl,
436 B_lcl, A_lcl,
437 X_colMap_lcl, X_domMap_lcl,
438 beta,
439 do_X_update);
440 }
441 if (!do_X_update)
442 X.update(STS::one (), W, STS::one ());
443}
444
445} // namespace Details
446} // namespace Ifpack2
447
448#define IFPACK2_DETAILS_CHEBYSHEVKERNEL_INSTANT(SC,LO,GO,NT) \
449 template class Ifpack2::Details::ChebyshevKernel<Tpetra::Operator<SC, LO, GO, NT> >;
450
451#endif // IFPACK2_DETAILS_CHEBYSHEVKERNEL_DEF_HPP
Compute scaled damped residual for Chebyshev.
Definition Ifpack2_Details_ChebyshevKernel_decl.hpp:45
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:41
Functor for computing W := alpha * D * (B - A*X) + beta * W and X := X+W.
Definition Ifpack2_Details_ChebyshevKernel_def.hpp:41