Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_allReduceView.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_ALLREDUCEVIEW_HPP
11#define TPETRA_DETAILS_ALLREDUCEVIEW_HPP
12
15#include "Kokkos_Core.hpp"
16#include "Teuchos_CommHelpers.hpp"
17#include "Tpetra_Details_temporaryViewUtils.hpp"
18#include <limits>
19#include <type_traits>
20
23
24namespace Tpetra {
25namespace Details {
26
27template<typename InputViewType, typename OutputViewType>
28static void
29allReduceRawContiguous (const OutputViewType& output,
30 const InputViewType& input,
31 const Teuchos::Comm<int>& comm)
32{
33 using Teuchos::outArg;
34 using Teuchos::REDUCE_SUM;
35 using Teuchos::reduceAll;
36 using ValueType = typename InputViewType::non_const_value_type;
37 size_t count = input.span();
38 TEUCHOS_ASSERT( count <= size_t (INT_MAX) );
39 if(isInterComm(comm) && input.data() == output.data())
40 {
41 //Can't do in-place collective on an intercomm,
42 //so use a separate copy as the input.
43 typename InputViewType::array_layout layout(input.extent(0), input.extent(1), input.extent(2), input.extent(3), input.extent(4), input.extent(5), input.extent(6), input.extent(7));
44 Kokkos::View<typename InputViewType::non_const_data_type, typename InputViewType::array_layout, typename InputViewType::device_type>
45 tempInput(Kokkos::ViewAllocateWithoutInitializing("tempInput"), layout);
46 // DEEP_COPY REVIEW - This could be either DEVICE-TO-DEVICE or HOST-TO-HOST
47 // Either way, MPI is called right afterwards, meaning we'd need a sync on device
48 Kokkos::deep_copy(tempInput, input);
49 reduceAll<int, ValueType> (comm, REDUCE_SUM, static_cast<int> (count),
50 tempInput.data(), output.data());
51 }
52 else
53 reduceAll<int, ValueType> (comm, REDUCE_SUM, static_cast<int> (count),
54 input.data(), output.data());
55}
56
60template<class InputViewType, class OutputViewType>
61static void
62allReduceView (const OutputViewType& output,
63 const InputViewType& input,
64 const Teuchos::Comm<int>& comm)
65{
66
67 // using execution_space = typename OutputViewType::execution_space;
68 const bool viewsAlias = output.data () == input.data ();
69 if (comm.getSize () == 1) {
70 if (! viewsAlias) {
71 // InputViewType and OutputViewType can't be AnonymousSpace
72 // Views, because deep_copy needs to know their memory spaces.
73 // DEEP_COPY REVIEW - NOT TESTED
74 Kokkos::deep_copy (output, input);
75 }
76 return;
77 }
78
79 // we must ensure MPI can handle the pointers we pass it
80 // if GPUAware, we are done
81 // otherwise, if the views use GPUs, then we should copy them
82 using Layout = typename TempView::UnifiedContiguousLayout<InputViewType, OutputViewType>::type;
83 //if one or both is already in the correct layout, toLayout returns the same view
84 auto inputContig = TempView::toLayout<InputViewType, Layout>(input);
85 auto outputContig = TempView::toLayout<InputViewType, Layout>(output);
87 {
88 allReduceRawContiguous(outputContig, inputContig, comm);
89
90 }
91 else
92 {
93 auto inputMPI = TempView::toMPISafe<decltype(inputContig), false>(inputContig);
94 auto outputMPI = TempView::toMPISafe<decltype(outputContig), false>(outputContig);
95 allReduceRawContiguous(outputMPI, inputMPI, comm);
96 // DEEP_COPY REVIEW - Could be either
97 Kokkos::deep_copy(outputContig, outputMPI);
98 }
99 // DEEP_COPY REVIEW - Could be either
100 Kokkos::deep_copy(output, outputContig);
101}
102
103} // namespace Details
104} // namespace Tpetra
105
106#endif // TPETRA_DETAILS_ALLREDUCEVIEW_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::isInterComm.
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
Nonmember function that computes a residual Computes R = B - A * X.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator.
Namespace Tpetra contains the class and methods constituting the Tpetra library.