Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_BlockHelper.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_BLOCKHELPER_IMPL_HPP
11#define IFPACK2_BLOCKHELPER_IMPL_HPP
12
13#include "Ifpack2_BlockHelper_Timers.hpp"
14
15namespace Ifpack2 {
16
17 namespace BlockHelperDetails {
18
19 namespace KB = KokkosBatched;
20
24 using do_not_initialize_tag = Kokkos::ViewAllocateWithoutInitializing;
25
26 template <typename MemoryTraitsType, Kokkos::MemoryTraitsFlags flag>
27 using MemoryTraits = Kokkos::MemoryTraits<MemoryTraitsType::is_unmanaged |
28 MemoryTraitsType::is_random_access |
29 flag>;
30
31 template <typename ViewType>
32 using Unmanaged = Kokkos::View<typename ViewType::data_type,
33 typename ViewType::array_layout,
34 typename ViewType::device_type,
35 MemoryTraits<typename ViewType::memory_traits,Kokkos::Unmanaged> >;
36 template <typename ViewType>
37 using Atomic = Kokkos::View<typename ViewType::data_type,
38 typename ViewType::array_layout,
39 typename ViewType::device_type,
40 MemoryTraits<typename ViewType::memory_traits,Kokkos::Atomic> >;
41 template <typename ViewType>
42 using Const = Kokkos::View<typename ViewType::const_data_type,
43 typename ViewType::array_layout,
44 typename ViewType::device_type,
45 typename ViewType::memory_traits>;
46 template <typename ViewType>
47 using ConstUnmanaged = Const<Unmanaged<ViewType> >;
48
49 template <typename ViewType>
50 using AtomicUnmanaged = Atomic<Unmanaged<ViewType> >;
51
52 template <typename ViewType>
53 using Unmanaged = Kokkos::View<typename ViewType::data_type,
54 typename ViewType::array_layout,
55 typename ViewType::device_type,
56 MemoryTraits<typename ViewType::memory_traits,Kokkos::Unmanaged> >;
57
58
59 template <typename ViewType>
60 using Scratch = Kokkos::View<typename ViewType::data_type,
61 typename ViewType::array_layout,
62 typename ViewType::execution_space::scratch_memory_space,
63 MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
64
68 template<typename LayoutType> struct TpetraLittleBlock;
69 template<> struct TpetraLittleBlock<Kokkos::LayoutLeft> {
70 template<typename T> KOKKOS_INLINE_FUNCTION
71 static T getFlatIndex(const T i, const T j, const T blksize) { return i+j*blksize; }
72 };
73 template<> struct TpetraLittleBlock<Kokkos::LayoutRight> {
74 template<typename T> KOKKOS_INLINE_FUNCTION
75 static T getFlatIndex(const T i, const T j, const T blksize) { return i*blksize+j; }
76 };
77
81 template<typename T> struct BlockTridiagScalarType { typedef T type; };
82#if defined(IFPACK2_BLOCKHELPER_USE_SMALL_SCALAR_FOR_BLOCKTRIDIAG)
83 template<> struct BlockTridiagScalarType<double> { typedef float type; };
84 //template<> struct SmallScalarType<Kokkos::complex<double> > { typedef Kokkos::complex<float> type; };
85#endif
86
90 template<typename T> struct is_cuda { enum : bool { value = false }; };
91#if defined(KOKKOS_ENABLE_CUDA)
92 template<> struct is_cuda<Kokkos::Cuda> { enum : bool { value = true }; };
93#endif
94
98 template<typename T> struct is_hip { enum : bool { value = false }; };
99#if defined(KOKKOS_ENABLE_HIP)
100 template<> struct is_hip<Kokkos::HIP> { enum : bool { value = true }; };
101#endif
102
106 template<typename T> struct is_sycl { enum : bool { value = false }; };
107#if defined(KOKKOS_ENABLE_SYCL)
108 template<> struct is_sycl<Kokkos::Experimental::SYCL> { enum : bool { value = true }; };
109#endif
110
111 template<typename T> struct is_device { enum : bool { value = is_cuda<T>::value || is_hip<T>::value || is_sycl<T>::value }; };
112
113
117 template<typename T>
119 static void createInstance(T &exec_instance) {
120 exec_instance = T();
121 }
122#if defined(KOKKOS_ENABLE_CUDA)
123 static void createInstance(const cudaStream_t &s, T &exec_instance) {
124 exec_instance = T();
125 }
126#endif
127 };
128
129#if defined(KOKKOS_ENABLE_CUDA)
130 template<>
131 struct ExecutionSpaceFactory<Kokkos::Cuda> {
132 static void createInstance(Kokkos::Cuda &exec_instance) {
133 exec_instance = Kokkos::Cuda();
134 }
135 static void createInstance(const cudaStream_t &s, Kokkos::Cuda &exec_instance) {
136 exec_instance = Kokkos::Cuda(s);
137 }
138 };
139#endif
140
141#if defined(KOKKOS_ENABLE_HIP)
142 template<>
143 struct ExecutionSpaceFactory<Kokkos::HIP> {
144 static void createInstance(Kokkos::HIP &exec_instance) {
145 exec_instance = Kokkos::HIP();
146 }
147 };
148#endif
149
150#if defined(KOKKOS_ENABLE_SYCL)
151 template<>
152 struct ExecutionSpaceFactory<Kokkos::Experimental::SYCL> {
153 static void createInstance(Kokkos::Experimental::SYCL &exec_instance) {
154 exec_instance = Kokkos::Experimental::SYCL();
155 }
156 };
157#endif
158
159#if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_ENABLE_PROFILE)
160#define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN \
161 KOKKOS_IMPL_CUDA_SAFE_CALL(cudaProfilerStart());
162
163#define IFPACK2_BLOCKHELPER_PROFILER_REGION_END \
164 { KOKKOS_IMPL_CUDA_SAFE_CALL( cudaProfilerStop() ); }
165#else
167#define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN
168#define IFPACK2_BLOCKHELPER_PROFILER_REGION_END
169#endif
170
171
175 template<typename CommPtrType>
176 std::string get_msg_prefix (const CommPtrType &comm) {
177 const auto rank = comm->getRank();
178 const auto nranks = comm->getSize();
179 std::stringstream ss;
180 ss << "Rank " << rank << " of " << nranks << ": ";
181 return ss.str();
182 }
183
187 template<typename T, int N>
188 struct ArrayValueType {
189 T v[N];
190 KOKKOS_INLINE_FUNCTION
191 ArrayValueType() {
192 for (int i=0;i<N;++i)
193 this->v[i] = 0;
194 }
195 KOKKOS_INLINE_FUNCTION
196 ArrayValueType(const ArrayValueType &b) {
197 for (int i=0;i<N;++i)
198 this->v[i] = b.v[i];
199 }
200 };
201 template<typename T, int N>
202 static
203 KOKKOS_INLINE_FUNCTION
204 void
205 operator+=(ArrayValueType<T,N> &a,
206 const ArrayValueType<T,N> &b) {
207 for (int i=0;i<N;++i)
208 a.v[i] += b.v[i];
209 }
210
214 template<typename T, int N, typename ExecSpace>
215 struct SumReducer {
216 typedef SumReducer reducer;
217 typedef ArrayValueType<T,N> value_type;
218 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
219 value_type *value;
220
221 KOKKOS_INLINE_FUNCTION
222 SumReducer(value_type &val) : value(&val) {}
223
224 KOKKOS_INLINE_FUNCTION
225 void join(value_type &dst, value_type const &src) const {
226 for (int i=0;i<N;++i)
227 dst.v[i] += src.v[i];
228 }
229 KOKKOS_INLINE_FUNCTION
230 void init(value_type &val) const {
231 for (int i=0;i<N;++i)
232 val.v[i] = Kokkos::reduction_identity<T>::sum();
233 }
234 KOKKOS_INLINE_FUNCTION
235 value_type& reference() {
236 return *value;
237 }
238 KOKKOS_INLINE_FUNCTION
239 result_view_type view() const {
240 return result_view_type(value);
241 }
242 };
243
244
248 template <typename MatrixType>
249 struct ImplType {
253 typedef size_t size_type;
254 typedef typename MatrixType::scalar_type scalar_type;
255 typedef typename MatrixType::local_ordinal_type local_ordinal_type;
256 typedef typename MatrixType::global_ordinal_type global_ordinal_type;
257 typedef typename MatrixType::node_type node_type;
258
262 typedef typename Kokkos::Details::ArithTraits<scalar_type>::val_type impl_scalar_type;
263 typedef typename Kokkos::ArithTraits<impl_scalar_type>::mag_type magnitude_type;
264
265 typedef typename BlockTridiagScalarType<impl_scalar_type>::type btdm_scalar_type;
266 typedef typename Kokkos::ArithTraits<btdm_scalar_type>::mag_type btdm_magnitude_type;
267
271 typedef Kokkos::DefaultHostExecutionSpace host_execution_space;
272
276 typedef typename node_type::device_type node_device_type;
277 typedef typename node_device_type::execution_space node_execution_space;
278 typedef typename node_device_type::memory_space node_memory_space;
279
280#if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_USE_CUDA_SPACE)
282 typedef node_execution_space execution_space;
283 typedef typename std::conditional<std::is_same<node_memory_space,Kokkos::CudaUVMSpace>::value,
284 Kokkos::CudaSpace,
285 node_memory_space>::type memory_space;
286 typedef Kokkos::Device<execution_space,memory_space> device_type;
287#else
288 typedef node_device_type device_type;
289 typedef node_execution_space execution_space;
290 typedef node_memory_space memory_space;
291#endif
292
293 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_multivector_type;
294 typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> tpetra_map_type;
295 typedef Tpetra::Import<local_ordinal_type,global_ordinal_type,node_type> tpetra_import_type;
296 typedef Tpetra::RowMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_row_matrix_type;
297 typedef Tpetra::CrsMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_crs_matrix_type;
298 typedef Tpetra::CrsGraph<local_ordinal_type,global_ordinal_type,node_type> tpetra_crs_graph_type;
299 typedef Tpetra::BlockCrsMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_block_crs_matrix_type;
300 typedef typename tpetra_block_crs_matrix_type::little_block_type tpetra_block_access_view_type;
301 typedef Tpetra::BlockMultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_block_multivector_type;
302 typedef typename tpetra_block_crs_matrix_type::crs_graph_type::local_graph_device_type local_crs_graph_type;
303
307 template<typename T, int l> using Vector = KB::Vector<T,l>;
308 template<typename T> using SIMD = KB::SIMD<T>;
309 template<typename T, typename M> using DefaultVectorLength = KB::DefaultVectorLength<T,M>;
310 template<typename T, typename M> using DefaultInternalVectorLength = KB::DefaultInternalVectorLength<T,M>;
311
312 static constexpr int vector_length = DefaultVectorLength<btdm_scalar_type,memory_space>::value;
313 static constexpr int internal_vector_length = DefaultInternalVectorLength<btdm_scalar_type,memory_space>::value;
314 typedef Vector<SIMD<btdm_scalar_type>,vector_length> vector_type;
315 typedef Vector<SIMD<btdm_scalar_type>,internal_vector_length> internal_vector_type;
316
320 typedef Kokkos::View<size_type*,device_type> size_type_1d_view;
321 typedef Kokkos::View<size_type**,device_type> size_type_2d_view;
322 typedef Kokkos::View<int64_t***,Kokkos::LayoutRight,device_type> i64_3d_view;
323 typedef Kokkos::View<local_ordinal_type*,device_type> local_ordinal_type_1d_view;
324 typedef Kokkos::View<local_ordinal_type**,device_type> local_ordinal_type_2d_view;
325 // tpetra block crs values
326 typedef Kokkos::View<impl_scalar_type*,device_type> impl_scalar_type_1d_view;
327 typedef Kokkos::View<impl_scalar_type*,node_device_type> impl_scalar_type_1d_view_tpetra;
328
329 // tpetra multivector values (layout left): may need to change the typename more explicitly
330 typedef Kokkos::View<impl_scalar_type**,Kokkos::LayoutLeft,device_type> impl_scalar_type_2d_view;
331 typedef Kokkos::View<impl_scalar_type**,Kokkos::LayoutLeft,node_device_type> impl_scalar_type_2d_view_tpetra;
332
333 // packed data always use layout right
334 typedef Kokkos::View<vector_type*,device_type> vector_type_1d_view;
335 typedef Kokkos::View<vector_type***,Kokkos::LayoutRight,device_type> vector_type_3d_view;
336 typedef Kokkos::View<vector_type****,Kokkos::LayoutRight,device_type> vector_type_4d_view;
337 typedef Kokkos::View<internal_vector_type***,Kokkos::LayoutRight,device_type> internal_vector_type_3d_view;
338 typedef Kokkos::View<internal_vector_type****,Kokkos::LayoutRight,device_type> internal_vector_type_4d_view;
339 typedef Kokkos::View<internal_vector_type*****,Kokkos::LayoutRight,device_type> internal_vector_type_5d_view;
340 typedef Kokkos::View<btdm_scalar_type**,Kokkos::LayoutRight,device_type> btdm_scalar_type_2d_view;
341 typedef Kokkos::View<btdm_scalar_type***,Kokkos::LayoutRight,device_type> btdm_scalar_type_3d_view;
342 typedef Kokkos::View<btdm_scalar_type****,Kokkos::LayoutRight,device_type> btdm_scalar_type_4d_view;
343 typedef Kokkos::View<btdm_scalar_type*****,Kokkos::LayoutRight,device_type> btdm_scalar_type_5d_view;
344 };
345
346
350 template<typename MatrixType>
351 struct NormManager {
352 public:
353 using impl_type = ImplType<MatrixType>;
354 using host_execution_space = typename impl_type::host_execution_space;
355 using magnitude_type = typename impl_type::magnitude_type;
356
357 private:
358 bool collective_;
359 int sweep_step_, sweep_step_upper_bound_;
360#ifdef HAVE_IFPACK2_MPI
361 MPI_Request mpi_request_;
362 MPI_Comm comm_;
363#endif
364 magnitude_type work_[3];
365
366 public:
367 NormManager() = default;
368 NormManager(const NormManager &b) = default;
369 NormManager(const Teuchos::RCP<const Teuchos::Comm<int> >& comm) {
370 sweep_step_ = 1;
371 sweep_step_upper_bound_ = 1;
372 collective_ = comm->getSize() > 1;
373 if (collective_) {
374#ifdef HAVE_IFPACK2_MPI
375 const auto mpi_comm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
376 TEUCHOS_ASSERT( ! mpi_comm.is_null());
377 comm_ = *mpi_comm->getRawMpiComm();
378#endif
379 }
380 const magnitude_type zero(0), minus_one(-1);
381 work_[0] = zero;
382 work_[1] = zero;
383 work_[2] = minus_one;
384 }
385
386 // Check the norm every sweep_step sweeps.
387 void setCheckFrequency(const int sweep_step) {
388 TEUCHOS_TEST_FOR_EXCEPT_MSG(sweep_step < 1, "sweep step must be >= 1");
389 sweep_step_upper_bound_ = sweep_step;
390 sweep_step_ = 1;
391 }
392
393 // Get the buffer into which to store rank-local squared norms.
394 magnitude_type* getBuffer() { return &work_[0]; }
395
396 // Call MPI_Iallreduce to find the global squared norms.
397 void ireduce(const int sweep, const bool force = false) {
398 if ( ! force && sweep % sweep_step_) return;
399
400 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::Ireduce", Ireduce);
401
402 work_[1] = work_[0];
403#ifdef HAVE_IFPACK2_MPI
404 auto send_data = &work_[1];
405 auto recv_data = &work_[0];
406 if (collective_) {
407# if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
408 MPI_Iallreduce(send_data, recv_data, 1,
409 Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
410 MPI_SUM, comm_, &mpi_request_);
411# else
412 MPI_Allreduce (send_data, recv_data, 1,
413 Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
414 MPI_SUM, comm_);
415# endif
416 }
417#endif
418 }
419
420 // Check if the norm-based termination criterion is met. tol2 is the
421 // tolerance squared. Sweep is the sweep index. If not every iteration is
422 // being checked, this function immediately returns false. If a check must
423 // be done at this iteration, it waits for the reduction triggered by
424 // ireduce to complete, then checks the global norm against the tolerance.
425 bool checkDone (const int sweep, const magnitude_type tol2, const bool force = false) {
426 // early return
427 if (sweep <= 0) return false;
428
429 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::CheckDone", CheckDone);
430
431 TEUCHOS_ASSERT(sweep >= 1);
432 if ( ! force && (sweep - 1) % sweep_step_) return false;
433 if (collective_) {
434#ifdef HAVE_IFPACK2_MPI
435# if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
436 MPI_Wait(&mpi_request_, MPI_STATUS_IGNORE);
437# else
438 // Do nothing.
439# endif
440#endif
441 }
442 bool r_val = false;
443 if (sweep == 1) {
444 work_[2] = work_[0];
445 } else {
446 r_val = (work_[0] < tol2*work_[2]);
447 }
448
449 // adjust sweep step
450 const auto adjusted_sweep_step = 2*sweep_step_;
451 if (adjusted_sweep_step < sweep_step_upper_bound_) {
452 sweep_step_ = adjusted_sweep_step;
453 } else {
454 sweep_step_ = sweep_step_upper_bound_;
455 }
456 return r_val;
457 }
458
459 // After termination has occurred, finalize the norms for use in
460 // get_norms{0,final}.
461 void finalize () {
462 work_[0] = std::sqrt(work_[0]); // after converged
463 if (work_[2] >= 0)
464 work_[2] = std::sqrt(work_[2]); // first norm
465 // if work_[2] is minus one, then norm is not requested.
466 }
467
468 // Report norms to the caller.
469 const magnitude_type getNorms0 () const { return work_[2]; }
470 const magnitude_type getNormsFinal () const { return work_[0]; }
471 };
472
473 template<typename MatrixType>
474 void reduceVector(const ConstUnmanaged<typename BlockHelperDetails::ImplType<MatrixType>::impl_scalar_type_1d_view> zz,
475 /* */ typename BlockHelperDetails::ImplType<MatrixType>::magnitude_type *vals) {
476 IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN;
477 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ReduceVector", ReduceVector);
478
479 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
480 using local_ordinal_type = typename impl_type::local_ordinal_type;
481 using impl_scalar_type = typename impl_type::impl_scalar_type;
482#if 0
483 const auto norm2 = KokkosBlas::nrm1(zz);
484#else
485 impl_scalar_type norm2(0);
486 Kokkos::parallel_reduce
487 ("ReduceMultiVector::Device",
488 Kokkos::RangePolicy<typename impl_type::execution_space>(0,zz.extent(0)),
489 KOKKOS_LAMBDA(const local_ordinal_type &i, impl_scalar_type &update) {
490 update += zz(i);
491 }, norm2);
492#endif
493 vals[0] = Kokkos::ArithTraits<impl_scalar_type>::abs(norm2);
494
495 IFPACK2_BLOCKHELPER_PROFILER_REGION_END;
496 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename ImplType<MatrixType>::execution_space)
497 }
498
499 } // namespace BlockHelperDetails
500
501} // namespace Ifpack2
502
503#endif
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:41
Definition Ifpack2_BlockHelper.hpp:188
Definition Ifpack2_BlockHelper.hpp:81
Definition Ifpack2_BlockHelper.hpp:118
Definition Ifpack2_BlockHelper.hpp:249
Kokkos::View< size_type *, device_type > size_type_1d_view
Definition Ifpack2_BlockHelper.hpp:320
size_t size_type
Definition Ifpack2_BlockHelper.hpp:253
Kokkos::Details::ArithTraits< scalar_type >::val_type impl_scalar_type
Definition Ifpack2_BlockHelper.hpp:262
node_type::device_type node_device_type
Definition Ifpack2_BlockHelper.hpp:276
Kokkos::DefaultHostExecutionSpace host_execution_space
Definition Ifpack2_BlockHelper.hpp:271
KB::Vector< T, l > Vector
Definition Ifpack2_BlockHelper.hpp:307
Definition Ifpack2_BlockHelper.hpp:68
Definition Ifpack2_BlockHelper.hpp:90
Definition Ifpack2_BlockHelper.hpp:98
Definition Ifpack2_BlockHelper.hpp:106