Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_idot.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_IDOT_HPP
11#define TPETRA_DETAILS_IDOT_HPP
12
28
29#include "Tpetra_iallreduce.hpp"
30#include "Tpetra_MultiVector.hpp"
31#include "Tpetra_Vector.hpp"
32#include "Teuchos_CommHelpers.hpp"
33#include "KokkosBlas1_dot.hpp"
34#include <stdexcept>
35#include <sstream>
36
37namespace Tpetra {
38namespace Details {
39
40// Temporary helper to get read-only view from multivector in requested space.
41// TODO: when https://github.com/kokkos/kokkos/issues/3850 is resolved,
42// remove this and just use templated getLocalView<Device>(ReadOnly).
43template<typename MV>
44struct GetReadOnly
45{
46 using DevView = typename MV::dual_view_type::t_dev::const_type;
47 using HostView = typename MV::dual_view_type::t_host::const_type;
48
49 template<typename exec_space>
50 static DevView get(const MV& x, typename std::enable_if<std::is_same<exec_space, typename MV::execution_space>::value>::type* = nullptr)
51 {
52 return x.getLocalViewDevice(Tpetra::Access::ReadOnly);
53 }
54
55 template<typename exec_space>
56 static HostView get(const MV& x, typename std::enable_if<!std::is_same<exec_space, typename MV::execution_space>::value>::type* = nullptr)
57 {
58 return x.getLocalViewHost(Tpetra::Access::ReadOnly);
59 }
60};
61
63
64template<class MV, class ResultView, bool runOnDevice>
65void idotLocal(const ResultView& localResult,
66 const MV& X,
67 const MV& Y)
68{
69 using pair_type = Kokkos::pair<size_t, size_t>;
70 using exec_space = typename std::conditional<runOnDevice, typename MV::execution_space, Kokkos::DefaultHostExecutionSpace>::type;
71 //if the execution space can access localResult, use it directly. Otherwise need to make a copy.
72 static_assert(Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible,
73 "idotLocal: Execution space must be able to access localResult");
74 //If Dot executes on Serial, it requires the result to be HostSpace. If localResult is CudaUVMSpace,
75 //we can just treat it like HostSpace.
76 Kokkos::View<typename ResultView::data_type, typename exec_space::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
77 localResultUnmanaged(localResult.data(), localResult.extent(0));
78 const size_t numRows = X.getLocalLength ();
79 const pair_type rowRange (0, numRows);
80 const size_t X_numVecs = X.getNumVectors ();
81 const size_t Y_numVecs = Y.getNumVectors ();
82 const size_t numVecs = X_numVecs > Y_numVecs ? X_numVecs : Y_numVecs;
83 // Check compatibility of number of columns; allow special cases of
84 // a multivector dot a single vector, or vice versa.
85 if (X_numVecs != Y_numVecs &&
86 X_numVecs != size_t (1) &&
87 Y_numVecs != size_t (1)) {
88 std::ostringstream os;
89 os << "Tpetra::idot: X.getNumVectors() = " << X_numVecs
90 << " != Y.getNumVectors() = " << Y_numVecs
91 << ", but neither is 1.";
92 throw std::invalid_argument (os.str ());
93 }
94 auto X_lcl = GetReadOnly<MV>::template get<exec_space>(X);
95 auto Y_lcl = GetReadOnly<MV>::template get<exec_space>(Y);
96 //Can the multivector dot kernel be used?
97 bool useMVDot = X.isConstantStride() && Y.isConstantStride() && X_numVecs == Y_numVecs;
98 if(useMVDot)
99 {
100 if (numVecs == size_t (1)) {
101 auto X_lcl_1d = Kokkos::subview (X_lcl, rowRange, 0);
102 auto Y_lcl_1d = Kokkos::subview (Y_lcl, rowRange, 0);
103 auto result_0d = Kokkos::subview (localResultUnmanaged, 0);
104 KokkosBlas::dot (result_0d, X_lcl_1d, Y_lcl_1d);
105 }
106 else {
107 auto X_lcl_2d = Kokkos::subview (X_lcl, rowRange, pair_type (0, X_numVecs));
108 auto Y_lcl_2d = Kokkos::subview (Y_lcl, rowRange, pair_type (0, Y_numVecs));
109 KokkosBlas::dot (localResultUnmanaged, X_lcl_2d, Y_lcl_2d);
110 }
111 }
112 else
113 {
114 auto XWhichVectors = Tpetra::getMultiVectorWhichVectors(X);
115 auto YWhichVectors = Tpetra::getMultiVectorWhichVectors(Y);
116 //Need to compute each dot individually, using 1D subviews of X_lcl and Y_lcl
117 for(size_t vec = 0; vec < numVecs; vec++) {
118 //Get the Vectors for each column of X and Y that will be dotted together.
119 //Note: "which vectors" is not populated for constant stride MVs (but it's the identity mapping)
120 size_t Xj = (numVecs == X_numVecs) ? vec : 0;
121 Xj = X.isConstantStride() ? Xj : XWhichVectors[Xj];
122 size_t Yj = (numVecs == Y_numVecs) ? vec : 0;
123 Yj = Y.isConstantStride() ? Yj : YWhichVectors[Yj];
124 auto Xcol = Kokkos::subview(X_lcl, rowRange, Xj);
125 auto Ycol = Kokkos::subview(Y_lcl, rowRange, Yj);
126
127 //Compute the rank-1 dot product, and place the result in an element of localResult
128 KokkosBlas::dot(Kokkos::subview(localResultUnmanaged, vec), Xcol, Ycol);
129 }
130 }
131}
132
133//Helper to avoid extra instantiations of KokkosBlas::dot and iallreduce.
134template<typename MV, typename ResultView>
135struct IdotHelper
136{
137 using dot_type = typename MV::dot_type;
138
139 //First version: use result directly
140 template<typename exec_space>
141 static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
142 const ResultView& globalResult, const MV& X, const MV& Y,
143 typename std::enable_if<Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* = nullptr)
144
145 {
146 constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
147 idotLocal<MV, ResultView, runOnDevice>(globalResult, X, Y);
148 //Fence because we're giving result of device kernel to MPI
149 if(runOnDevice)
150 exec_space().fence();
151 auto comm = X.getMap()->getComm();
152 return iallreduce(globalResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
153 }
154
155 //Second version: use a temporary local result, because exec_space can't write to globalResult
156 template<typename exec_space>
157 static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
158 const ResultView& globalResult, const MV& X, const MV& Y,
159 typename std::enable_if<!Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* = nullptr)
160 {
161 constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
162 Kokkos::View<dot_type*, typename exec_space::memory_space> localResult(Kokkos::ViewAllocateWithoutInitializing("idot:localResult"), X.getNumVectors());
164 //Fence because we're giving result of device kernel to MPI
165 if(runOnDevice)
166 exec_space().fence();
167 auto comm = X.getMap()->getComm();
168 return iallreduce(localResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
169 }
170
171};
172
173
176template<class MV, class ResultView>
177std::shared_ptr< ::Tpetra::Details::CommRequest>
178idotImpl(const ResultView& globalResult,
179 const MV& X,
180 const MV& Y)
181{
182 static_assert(std::is_same<typename ResultView::non_const_value_type, typename MV::dot_type>::value,
183 "Tpetra::idot: result view's element type must match MV::dot_type");
184
185 // Execution space to use for dot kernel(s) is whichever has access to up-to-date X.
186 if(X.need_sync_device())
187 {
188 //run on host.
189 return IdotHelper<MV, ResultView>::template run<Kokkos::DefaultHostExecutionSpace>(globalResult, X, Y);
190 }
191 else
192 {
193 //run on device.
194 return IdotHelper<MV, ResultView>::template run<typename MV::execution_space>(globalResult, X, Y);
195 }
196}
197} // namespace Details
198
199//
200// SKIP DOWN TO HERE
201//
202
255template<class SC, class LO, class GO, class NT>
256std::shared_ptr< ::Tpetra::Details::CommRequest>
257idot (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type* resultRaw,
258 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
259 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
260{
261 using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
262 const size_t X_numVecs = X.getNumVectors ();
263 const size_t Y_numVecs = Y.getNumVectors ();
264 const size_t numVecs = (X_numVecs > Y_numVecs) ? X_numVecs : Y_numVecs;
265 Kokkos::View<dot_type*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
266 resultView(resultRaw, numVecs);
267 return Details::idotImpl(resultView, X, Y);
268}
269
332template<class SC, class LO, class GO, class NT>
333std::shared_ptr< ::Tpetra::Details::CommRequest>
334idot (const Kokkos::View<typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*,
335 typename ::Tpetra::MultiVector<SC, LO, GO, NT>::device_type>& result,
336 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
337 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
338{
339 return Details::idotImpl(result, X, Y);
340}
341
383template<class SC, class LO, class GO, class NT>
384std::shared_ptr< ::Tpetra::Details::CommRequest>
385idot (const Kokkos::View<typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type,
386 typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type>& result,
387 const ::Tpetra::Vector<SC, LO, GO, NT>& X,
388 const ::Tpetra::Vector<SC, LO, GO, NT>& Y)
389{
390 using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
391 using result_device_t = typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type;
392 Kokkos::View<dot_type*, result_device_t, Kokkos::MemoryTraits<Kokkos::Unmanaged>> result1D(result.data(), 1);
393 return Details::idotImpl(result1D, X, Y);
394}
395
396} // namespace Tpetra
397
398#endif // TPETRA_DETAILS_IDOT_HPP
Declaration of Tpetra::iallreduce.
Nonmember function that computes a residual Computes R = B - A * X.
std::shared_ptr< ::Tpetra::Details::CommRequest > idotImpl(const ResultView &globalResult, const MV &X, const MV &Y)
Internal (common) version of idot, a global dot product that uses a non-blocking MPI reduction.
void idotLocal(const ResultView &localResult, const MV &X, const MV &Y)
Compute dot product locally. Where the kernel runs controlled by runOnDevice.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
std::shared_ptr< ::Tpetra::Details::CommRequest > idot(typename ::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y)
Nonblocking dot product, with either Tpetra::MultiVector or Tpetra::Vector inputs,...