254 typedef typename MatrixType::scalar_type scalar_type;
255 typedef typename MatrixType::local_ordinal_type local_ordinal_type;
256 typedef typename MatrixType::global_ordinal_type global_ordinal_type;
257 typedef typename MatrixType::node_type node_type;
263 typedef typename Kokkos::ArithTraits<impl_scalar_type>::mag_type magnitude_type;
265 typedef typename BlockTridiagScalarType<impl_scalar_type>::type btdm_scalar_type;
266 typedef typename Kokkos::ArithTraits<btdm_scalar_type>::mag_type btdm_magnitude_type;
277 typedef typename node_device_type::execution_space node_execution_space;
278 typedef typename node_device_type::memory_space node_memory_space;
280#if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_USE_CUDA_SPACE)
282 typedef node_execution_space execution_space;
283 typedef typename std::conditional<std::is_same<node_memory_space,Kokkos::CudaUVMSpace>::value,
285 node_memory_space>
::type memory_space;
286 typedef Kokkos::Device<execution_space,memory_space> device_type;
289 typedef node_execution_space execution_space;
290 typedef node_memory_space memory_space;
293 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_multivector_type;
294 typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> tpetra_map_type;
295 typedef Tpetra::Import<local_ordinal_type,global_ordinal_type,node_type> tpetra_import_type;
296 typedef Tpetra::RowMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_row_matrix_type;
297 typedef Tpetra::CrsMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_crs_matrix_type;
298 typedef Tpetra::CrsGraph<local_ordinal_type,global_ordinal_type,node_type> tpetra_crs_graph_type;
299 typedef Tpetra::BlockCrsMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_block_crs_matrix_type;
300 typedef typename tpetra_block_crs_matrix_type::little_block_type tpetra_block_access_view_type;
301 typedef Tpetra::BlockMultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_block_multivector_type;
302 typedef typename tpetra_block_crs_matrix_type::crs_graph_type::local_graph_device_type local_crs_graph_type;
307 template<
typename T,
int l>
using Vector = KB::Vector<T,l>;
308 template<
typename T>
using SIMD = KB::SIMD<T>;
309 template<
typename T,
typename M>
using DefaultVectorLength = KB::DefaultVectorLength<T,M>;
310 template<
typename T,
typename M>
using DefaultInternalVectorLength = KB::DefaultInternalVectorLength<T,M>;
312 static constexpr int vector_length = DefaultVectorLength<btdm_scalar_type,memory_space>::value;
313 static constexpr int internal_vector_length = DefaultInternalVectorLength<btdm_scalar_type,memory_space>::value;
321 typedef Kokkos::View<size_type**,device_type> size_type_2d_view;
322 typedef Kokkos::View<int64_t***,Kokkos::LayoutRight,device_type> i64_3d_view;
323 typedef Kokkos::View<local_ordinal_type*,device_type> local_ordinal_type_1d_view;
324 typedef Kokkos::View<local_ordinal_type**,device_type> local_ordinal_type_2d_view;
326 typedef Kokkos::View<impl_scalar_type*,device_type> impl_scalar_type_1d_view;
327 typedef Kokkos::View<impl_scalar_type*,node_device_type> impl_scalar_type_1d_view_tpetra;
330 typedef Kokkos::View<impl_scalar_type**,Kokkos::LayoutLeft,device_type> impl_scalar_type_2d_view;
331 typedef Kokkos::View<impl_scalar_type**,Kokkos::LayoutLeft,node_device_type> impl_scalar_type_2d_view_tpetra;
334 typedef Kokkos::View<vector_type*,device_type> vector_type_1d_view;
335 typedef Kokkos::View<vector_type***,Kokkos::LayoutRight,device_type> vector_type_3d_view;
336 typedef Kokkos::View<vector_type****,Kokkos::LayoutRight,device_type> vector_type_4d_view;
337 typedef Kokkos::View<internal_vector_type***,Kokkos::LayoutRight,device_type> internal_vector_type_3d_view;
338 typedef Kokkos::View<internal_vector_type****,Kokkos::LayoutRight,device_type> internal_vector_type_4d_view;
339 typedef Kokkos::View<internal_vector_type*****,Kokkos::LayoutRight,device_type> internal_vector_type_5d_view;
340 typedef Kokkos::View<btdm_scalar_type**,Kokkos::LayoutRight,device_type> btdm_scalar_type_2d_view;
341 typedef Kokkos::View<btdm_scalar_type***,Kokkos::LayoutRight,device_type> btdm_scalar_type_3d_view;
342 typedef Kokkos::View<btdm_scalar_type****,Kokkos::LayoutRight,device_type> btdm_scalar_type_4d_view;
343 typedef Kokkos::View<btdm_scalar_type*****,Kokkos::LayoutRight,device_type> btdm_scalar_type_5d_view;
355 using magnitude_type =
typename impl_type::magnitude_type;
359 int sweep_step_, sweep_step_upper_bound_;
360#ifdef HAVE_IFPACK2_MPI
361 MPI_Request mpi_request_;
364 magnitude_type work_[3];
367 NormManager() =
default;
368 NormManager(
const NormManager &b) =
default;
369 NormManager(
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm) {
371 sweep_step_upper_bound_ = 1;
372 collective_ = comm->getSize() > 1;
374#ifdef HAVE_IFPACK2_MPI
375 const auto mpi_comm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
376 TEUCHOS_ASSERT( ! mpi_comm.is_null());
377 comm_ = *mpi_comm->getRawMpiComm();
380 const magnitude_type zero(0), minus_one(-1);
383 work_[2] = minus_one;
387 void setCheckFrequency(
const int sweep_step) {
388 TEUCHOS_TEST_FOR_EXCEPT_MSG(sweep_step < 1, "sweep step must be >= 1
");
389 sweep_step_upper_bound_ = sweep_step;
393 // Get the buffer into which to store rank-local squared norms.
394 magnitude_type* getBuffer() { return &work_[0]; }
396 // Call MPI_Iallreduce to find the global squared norms.
397 void ireduce(const int sweep, const bool force = false) {
398 if ( ! force && sweep % sweep_step_) return;
400 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::Ireduce
", Ireduce);
403#ifdef HAVE_IFPACK2_MPI
404 auto send_data = &work_[1];
405 auto recv_data = &work_[0];
407# if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
408 MPI_Iallreduce(send_data, recv_data, 1,
409 Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
410 MPI_SUM, comm_, &mpi_request_);
412 MPI_Allreduce (send_data, recv_data, 1,
413 Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
420 // Check if the norm-based termination criterion is met. tol2 is the
421 // tolerance squared. Sweep is the sweep index. If not every iteration is
422 // being checked, this function immediately returns false. If a check must
423 // be done at this iteration, it waits for the reduction triggered by
424 // ireduce to complete, then checks the global norm against the tolerance.
425 bool checkDone (const int sweep, const magnitude_type tol2, const bool force = false) {
427 if (sweep <= 0) return false;
429 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::CheckDone
", CheckDone);
431 TEUCHOS_ASSERT(sweep >= 1);
432 if ( ! force && (sweep - 1) % sweep_step_) return false;
434#ifdef HAVE_IFPACK2_MPI
435# if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
436 MPI_Wait(&mpi_request_, MPI_STATUS_IGNORE);
446 r_val = (work_[0] < tol2*work_[2]);
450 const auto adjusted_sweep_step = 2*sweep_step_;
451 if (adjusted_sweep_step < sweep_step_upper_bound_) {
452 sweep_step_ = adjusted_sweep_step;
454 sweep_step_ = sweep_step_upper_bound_;
459 // After termination has occurred, finalize the norms for use in
460 // get_norms{0,final}.
462 work_[0] = std::sqrt(work_[0]); // after converged
464 work_[2] = std::sqrt(work_[2]); // first norm
465 // if work_[2] is minus one, then norm is not requested.
468 // Report norms to the caller.
469 const magnitude_type getNorms0 () const { return work_[2]; }
470 const magnitude_type getNormsFinal () const { return work_[0]; }