Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_getDiagCopyWithoutOffsets_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
11#define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
12
20
22#include "Tpetra_RowGraph.hpp"
23#include "Tpetra_CrsGraph.hpp"
24#include "Tpetra_RowMatrix.hpp"
25#include "Tpetra_Vector.hpp"
26
27namespace Tpetra {
28namespace Details {
29
30// Work-around for #499: Implementation of one-argument (no offsets)
31// getLocalDiagCopy for the NOT fill-complete case.
32//
33// NOTE (mfh 18 Jul 2016) This calls functions that are NOT GPU device
34// functions! Thus, we do NOT use KOKKOS_INLINE_FUNCTION or
35// KOKKOS_FUNCTION here, because those attempt to mark the functions
36// they modify as CUDA device functions. This functor is ONLY for
37// non-CUDA execution spaces!
38template<class SC, class LO, class GO, class NT>
39class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
40public:
41 typedef ::Tpetra::RowMatrix<SC, LO, GO, NT> row_matrix_type;
42 typedef ::Tpetra::Vector<SC, LO, GO, NT> vec_type;
43
44 typedef typename vec_type::impl_scalar_type IST;
45 // The output Vector determines the execution space.
46
47private:
48 typedef typename vec_type::dual_view_type::t_host::execution_space host_execution_space;
49 typedef typename vec_type::map_type map_type;
50
51 static bool
52 graphIsSorted (const row_matrix_type& A)
53 {
54 using Teuchos::RCP;
55 using Teuchos::rcp_dynamic_cast;
56 typedef Tpetra::CrsGraph<LO, GO, NT> crs_graph_type;
57 typedef Tpetra::RowGraph<LO, GO, NT> row_graph_type;
58
59 // We conservatively assume not sorted. RowGraph lacks an
60 // "isSorted" predicate, so we can't know for sure unless the cast
61 // to CrsGraph succeeds.
62 bool sorted = false;
63
64 RCP<const row_graph_type> G_row = A.getGraph ();
65 if (! G_row.is_null ()) {
66 RCP<const crs_graph_type> G_crs =
67 rcp_dynamic_cast<const crs_graph_type> (G_row);
68 if (! G_crs.is_null ()) {
69 sorted = G_crs->isSorted ();
70 }
71 }
72
73 return sorted;
74 }
75
76public:
77 // lclNumErrs [out] Total count of errors on this process.
78 GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor (LO& lclNumErrs,
79 vec_type& diag,
80 const row_matrix_type& A) :
81 A_ (A),
82 lclRowMap_ (*A.getRowMap ()),
83 lclColMap_ (*A.getColMap ()),
84 sorted_ (graphIsSorted (A))
85 {
86 const LO lclNumRows = static_cast<LO> (diag.getLocalLength ());
87 {
88 const LO matLclNumRows =
89 static_cast<LO> (lclRowMap_.getLocalNumElements ());
90 TEUCHOS_TEST_FOR_EXCEPTION
91 (lclNumRows != matLclNumRows, std::invalid_argument,
92 "diag.getLocalLength() = " << lclNumRows << " != "
93 "A.getRowMap()->getLocalNumElements() = " << matLclNumRows << ".");
94 }
95
96 // Side effects start below this point.
97 D_lcl_ = diag.getLocalViewHost(Access::OverwriteAll);
98 D_lcl_1d_ = Kokkos::subview (D_lcl_, Kokkos::ALL (), 0);
99
100 Kokkos::RangePolicy<host_execution_space, LO> range (0, lclNumRows);
101 lclNumErrs = 0;
102 Kokkos::parallel_reduce (range, *this, lclNumErrs);
103
104 // sync changes back to device, since the user doesn't know that
105 // we had to run on host.
106 //diag.template sync<typename device_type::memory_space> ();
107 }
108
109 void operator () (const LO& lclRowInd, LO& errCount) const {
110 using KokkosSparse::findRelOffset;
111
112 D_lcl_1d_(lclRowInd) = Kokkos::ArithTraits<IST>::zero ();
113 const GO gblInd = lclRowMap_.getGlobalElement (lclRowInd);
114 const LO lclColInd = lclColMap_.getLocalElement (gblInd);
115
116 if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
117 errCount++;
118 }
119 else { // row index is also in the column Map on this process
120 typename row_matrix_type::local_inds_host_view_type lclColInds;
121 typename row_matrix_type::values_host_view_type curVals;
122 A_.getLocalRowView(lclRowInd, lclColInds, curVals);
123 LO numEnt = lclColInds.extent(0);
124 // The search hint is always zero, since we only call this
125 // once per row of the matrix.
126 const LO hint = 0;
127 const LO offset =
128 findRelOffset (lclColInds, numEnt, lclColInd, hint, sorted_);
129 if (offset == numEnt) { // didn't find the diagonal column index
130 errCount++;
131 }
132 else {
133 D_lcl_1d_(lclRowInd) = curVals[offset];
134 }
135 }
136 }
137
138private:
139 const row_matrix_type& A_;
140 map_type lclRowMap_;
141 map_type lclColMap_;
142 typename vec_type::dual_view_type::t_host D_lcl_;
143 decltype (Kokkos::subview (D_lcl_, Kokkos::ALL (), 0)) D_lcl_1d_;
144 const bool sorted_;
145};
146
147
148template<class SC, class LO, class GO, class NT>
149LO
151 const ::Tpetra::RowMatrix<SC, LO, GO, NT>& A,
152 const bool debug)
153{
155 using Teuchos::outArg;
156 using Teuchos::REDUCE_MIN;
157 using Teuchos::reduceAll;
158 typedef GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC,
159 LO, GO, NT> functor_type;
160
161 // The functor's constructor does error checking and executes the
162 // thread-parallel kernel.
163
164 LO lclNumErrs = 0;
165
166 if (debug) {
167 int lclSuccess = 1;
168 int gblSuccess = 0;
169 std::ostringstream errStrm;
170 Teuchos::RCP<const Teuchos::Comm<int> > commPtr = A.getComm ();
171 if (commPtr.is_null ()) {
172 return lclNumErrs; // this process does not participate
173 }
174 const Teuchos::Comm<int>& comm = *commPtr;
175
176 try {
177 functor_type functor (lclNumErrs, diag, A);
178 }
179 catch (std::exception& e) {
180 lclSuccess = -1;
181 errStrm << "Process " << A.getComm ()->getRank () << ": "
182 << e.what () << std::endl;
183 }
184 if (lclNumErrs != 0) {
185 lclSuccess = 0;
186 }
187
188 reduceAll<int, int> (comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
189 if (gblSuccess == -1) {
190 if (comm.getRank () == 0) {
191 // We gather into std::cerr, rather than using an
192 // std::ostringstream, because there might be a lot of MPI
193 // processes. It could take too much memory to gather all the
194 // messages to Process 0 before printing. gathervPrint gathers
195 // and prints one message at a time, thus saving memory. I
196 // don't want to run out of memory while trying to print an
197 // error message; that would hide the real problem.
198 std::cerr << "getLocalDiagCopyWithoutOffsetsNotFillComplete threw an "
199 "exception on one or more MPI processes in the matrix's comunicator."
200 << std::endl;
201 }
202 gathervPrint (std::cerr, errStrm.str (), comm);
203 // Don't need to print anything here, since we've already
204 // printed to std::cerr above.
205 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "");
206 }
207 else if (gblSuccess == 0) {
208 TEUCHOS_TEST_FOR_EXCEPTION
209 (gblSuccess != 1, std::runtime_error,
210 "getLocalDiagCopyWithoutOffsetsNotFillComplete failed on "
211 "one or more MPI processes in the matrix's communicator.");
212 }
213 }
214 else { // ! debug
215 functor_type functor (lclNumErrs, diag, A);
216 }
217
218 return lclNumErrs;
219}
220
221} // namespace Details
222} // namespace Tpetra
223
224// Explicit template instantiation macro for
225// getLocalDiagCopyWithoutOffsetsNotFillComplete. NOT FOR USERS!!!
226// Must be used inside the Tpetra namespace.
227#define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_INSTANT( SCALAR, LO, GO, NODE ) \
228 template LO \
229 Details::getLocalDiagCopyWithoutOffsetsNotFillComplete< SCALAR, LO, GO, NODE > \
230 ( ::Tpetra::Vector< SCALAR, LO, GO, NODE >& diag, \
231 const ::Tpetra::RowMatrix< SCALAR, LO, GO, NODE >& A, \
232 const bool debug);
233
234#endif // TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
Declaration of a function that prints strings from each process.
typename device_type::execution_space execution_space
A distributed dense vector.
base_type::impl_scalar_type impl_scalar_type
Nonmember function that computes a residual Computes R = B - A * X.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix's diagonal entries into a Tpetra::V...
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.