Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_HIP.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
11#define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
12
13#include "Tpetra_Details_IntRowPtrHelper.hpp"
14
15#ifdef HAVE_TPETRA_INST_HIP
16namespace Tpetra {
17namespace MMdetails {
18
19/*********************************************************************************************************/
20// MMM KernelWrappers for Partial Specialization to HIP
21template<class Scalar,
22 class LocalOrdinal,
23 class GlobalOrdinal,
24 class LocalOrdinalViewType>
25struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
28 const LocalOrdinalViewType & Acol2Brow,
29 const LocalOrdinalViewType & Acol2Irow,
30 const LocalOrdinalViewType & Bcol2Ccol,
31 const LocalOrdinalViewType & Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
33 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
36
37
38
39 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
40 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
41 const LocalOrdinalViewType & Acol2Brow,
42 const LocalOrdinalViewType & Acol2Irow,
43 const LocalOrdinalViewType & Bcol2Ccol,
44 const LocalOrdinalViewType & Icol2Ccol,
45 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
46 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
47 const std::string& label = std::string(),
48 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
49
50};
51
52// Jacobi KernelWrappers for Partial Specialization to HIP
53template<class Scalar,
54 class LocalOrdinal,
55 class GlobalOrdinal, class LocalOrdinalViewType>
56struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
57 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
58 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
59 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
60 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
61 const LocalOrdinalViewType & Acol2Brow,
62 const LocalOrdinalViewType & Acol2Irow,
63 const LocalOrdinalViewType & Bcol2Ccol,
64 const LocalOrdinalViewType & Icol2Ccol,
65 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
66 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
67 const std::string& label = std::string(),
68 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69
70 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
71 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
72 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
73 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
74 const LocalOrdinalViewType & Acol2Brow,
75 const LocalOrdinalViewType & Acol2Irow,
76 const LocalOrdinalViewType & Bcol2Ccol,
77 const LocalOrdinalViewType & Icol2Ccol,
78 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
79 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
80 const std::string& label = std::string(),
81 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
82
83 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
84 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
85 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
86 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
87 const LocalOrdinalViewType & Acol2Brow,
88 const LocalOrdinalViewType & Acol2Irow,
89 const LocalOrdinalViewType & Bcol2Ccol,
90 const LocalOrdinalViewType & Icol2Ccol,
91 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
92 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
93 const std::string& label = std::string(),
94 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
95};
96
97
98/*********************************************************************************************************/
99// AB NewMatrix Kernel wrappers (KokkosKernels/HIP Version)
100template<class Scalar,
101 class LocalOrdinal,
102 class GlobalOrdinal,
103 class LocalOrdinalViewType>
104void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
105 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
106 const LocalOrdinalViewType & Acol2Brow,
107 const LocalOrdinalViewType & Acol2Irow,
108 const LocalOrdinalViewType & Bcol2Ccol,
109 const LocalOrdinalViewType & Icol2Ccol,
110 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
111 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
112 const std::string& label,
113 const Teuchos::RCP<Teuchos::ParameterList>& params) {
114
115#ifdef HAVE_TPETRA_MMM_TIMINGS
116 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
117 using Teuchos::TimeMonitor;
118 Teuchos::RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPWrapper")))));
119#endif
120 // Node-specific code
121 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
122 std::string nodename("HIP");
123
124 // Lots and lots of typedefs
125 using Teuchos::RCP;
127 typedef typename KCRS::device_type device_t;
128 typedef typename KCRS::StaticCrsGraphType graph_t;
129 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
130 using int_view_t = Kokkos::View<int *, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
131 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
132 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
133 typedef typename KCRS::values_type::non_const_type scalar_view_t;
134 //typedef typename graph_t::row_map_type::const_type lno_view_t_const;
135
136 // Options
137 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
138 std::string myalg("SPGEMM_KK_MEMORY");
139 if(!params.is_null()) {
140 if(params->isParameter("hip: algorithm"))
141 myalg = params->get("hip: algorithm",myalg);
142 if(params->isParameter("hip: team work size"))
143 team_work_size = params->get("hip: team work size",team_work_size);
144 }
145
146 // KokkosKernelsHandle
147 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
148 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
149 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
150 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
151 typename int_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
152 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
153
154 // Grab the Kokkos::SparseCrsMatrices
155 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
156 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
157
158 c_lno_view_t Arowptr = Amat.graph.row_map,
159 Browptr = Bmat.graph.row_map;
160 const lno_nnz_view_t Acolind = Amat.graph.entries,
161 Bcolind = Bmat.graph.entries;
162 const scalar_view_t Avals = Amat.values,
163 Bvals = Bmat.values;
164
165 // Get the algorithm mode
166 std::string alg = nodename+std::string(" algorithm");
167 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
168 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
169 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
170
171 // Merge the B and Bimport matrices
172 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
173
174#ifdef HAVE_TPETRA_MMM_TIMINGS
175 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPCore"))));
176#endif
177
178 // Do the multiply on whatever we've got
179 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
180 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
181 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
182
183 // regardless of whether integer row ptrs are used, need to ultimately produce the row pointer, entries, and values of the expected types
184 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lno_row"), AnumRows + 1);
185 lno_nnz_view_t entriesC;
186 scalar_view_t valuesC;
187
188 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
189 const bool useIntRowptrs =
190 irph.shouldUseIntRowptrs() &&
191 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
192
193 if (useIntRowptrs) {
194 IntKernelHandle kh;
195 kh.create_spgemm_handle(alg_enum);
196 kh.set_team_work_size(team_work_size);
197
198 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_int_row"), AnumRows + 1);
199
200 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
201 auto Bint = irph.getIntRowptrMatrix(Bmerged);
202 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,false,Bint.graph.row_map,Bint.graph.entries,false, int_row_mapC);
203
204 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
205 if (c_nnz_size){
206 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
207 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
208 }
209 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,Aint.values,false,Bint.graph.row_map,Bint.graph.entries,Bint.values,false,int_row_mapC,entriesC,valuesC);
210 // transfer the integer rowptrs back to the correct rowptr type
211 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(int i){ row_mapC(i) = int_row_mapC(i);});
212 kh.destroy_spgemm_handle();
213 } else {
214 KernelHandle kh;
215 kh.create_spgemm_handle(alg_enum);
216 kh.set_team_work_size(team_work_size);
217
218 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
219
220 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
221 if (c_nnz_size){
222 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
223 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
224 }
225
226 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
227 kh.destroy_spgemm_handle();
228 }
229
230#ifdef HAVE_TPETRA_MMM_TIMINGS
231 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPSort"))));
232#endif
233
234 // Sort & set values
235 if (params.is_null() || params->get("sort entries",true))
236 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
237 C.setAllValues(row_mapC,entriesC,valuesC);
238
239#ifdef HAVE_TPETRA_MMM_TIMINGS
240 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix HIPESFC"))));
241#endif
242
243 // Final Fillcomplete
244 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
245 labelList->set("Timer Label",label);
246 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
247 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
248 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
249}
250
251
252/*********************************************************************************************************/
253template<class Scalar,
254 class LocalOrdinal,
255 class GlobalOrdinal,
256 class LocalOrdinalViewType>
257void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
258 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
259 const LocalOrdinalViewType & targetMapToOrigRow_dev,
260 const LocalOrdinalViewType & targetMapToImportRow_dev,
261 const LocalOrdinalViewType & Bcol2Ccol_dev,
262 const LocalOrdinalViewType & Icol2Ccol_dev,
263 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
264 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
265 const std::string& label,
266 const Teuchos::RCP<Teuchos::ParameterList>& params) {
267
268 // FIXME: Right now, this is a cut-and-paste of the serial kernel
269 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
270
271#ifdef HAVE_TPETRA_MMM_TIMINGS
272 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
273 using Teuchos::TimeMonitor;
274 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse SerialCore"))));
275 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
276#endif
277 using Teuchos::RCP;
278 using Teuchos::rcp;
279
280
281 // Lots and lots of typedefs
282 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
283 typedef typename KCRS::StaticCrsGraphType graph_t;
284 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
285 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
286 typedef typename KCRS::values_type::non_const_type scalar_view_t;
287
288 typedef Scalar SC;
289 typedef LocalOrdinal LO;
290 typedef GlobalOrdinal GO;
291 typedef Node NO;
292 typedef Map<LO,GO,NO> map_type;
293 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
294 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
295 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
296
297 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
298 // KDDKDD UVM Ideally, this function would run on device and use
299 // KDDKDD UVM KokkosKernels instead of this host implementation.
300 auto targetMapToOrigRow =
301 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
302 targetMapToOrigRow_dev);
303 auto targetMapToImportRow =
304 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
305 targetMapToImportRow_dev);
306 auto Bcol2Ccol =
307 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
308 Bcol2Ccol_dev);
309 auto Icol2Ccol =
310 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
311 Icol2Ccol_dev);
312
313 // Sizes
314 RCP<const map_type> Ccolmap = C.getColMap();
315 size_t m = Aview.origMatrix->getLocalNumRows();
316 size_t n = Ccolmap->getLocalNumElements();
317
318 // Grab the Kokkos::SparseCrsMatrices & inner stuff
319 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
320 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
321 const KCRS & Cmat = C.getLocalMatrixHost();
322
323 c_lno_view_t Arowptr = Amat.graph.row_map,
324 Browptr = Bmat.graph.row_map,
325 Crowptr = Cmat.graph.row_map;
326 const lno_nnz_view_t Acolind = Amat.graph.entries,
327 Bcolind = Bmat.graph.entries,
328 Ccolind = Cmat.graph.entries;
329 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
330 scalar_view_t Cvals = Cmat.values;
331
332 c_lno_view_t Irowptr;
333 lno_nnz_view_t Icolind;
334 scalar_view_t Ivals;
335 if(!Bview.importMatrix.is_null()) {
336 auto lclB = Bview.importMatrix->getLocalMatrixHost();
337 Irowptr = lclB.graph.row_map;
338 Icolind = lclB.graph.entries;
339 Ivals = lclB.values;
340 }
341
342#ifdef HAVE_TPETRA_MMM_TIMINGS
343 MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SerialCore - Compare"))));
344#endif
345
346 // Classic csr assembly (low memory edition)
347 // mfh 27 Sep 2016: The c_status array is an implementation detail
348 // of the local sparse matrix-matrix multiply routine.
349
350 // The status array will contain the index into colind where this entry was last deposited.
351 // c_status[i] < CSR_ip - not in the row yet
352 // c_status[i] >= CSR_ip - this is the entry where you can find the data
353 // We start with this filled with INVALID's indicating that there are no entries yet.
354 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
355 std::vector<size_t> c_status(n, ST_INVALID);
356
357 // For each row of A/C
358 size_t CSR_ip = 0, OLD_ip = 0;
359 for (size_t i = 0; i < m; i++) {
360 // First fill the c_status array w/ locations where we're allowed to
361 // generate nonzeros for this row
362 OLD_ip = Crowptr[i];
363 CSR_ip = Crowptr[i+1];
364 for (size_t k = OLD_ip; k < CSR_ip; k++) {
365 c_status[Ccolind[k]] = k;
366
367 // Reset values in the row of C
368 Cvals[k] = SC_ZERO;
369 }
370
371 for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
372 LO Aik = Acolind[k];
373 const SC Aval = Avals[k];
374 if (Aval == SC_ZERO)
375 continue;
376
377 if (targetMapToOrigRow[Aik] != LO_INVALID) {
378 // Local matrix
379 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
380
381 for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
382 LO Bkj = Bcolind[j];
383 LO Cij = Bcol2Ccol[Bkj];
384
385 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
386 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
387 "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
388
389 Cvals[c_status[Cij]] += Aval * Bvals[j];
390 }
391
392 } else {
393 // Remote matrix
394 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
395 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
396 LO Ikj = Icolind[j];
397 LO Cij = Icol2Ccol[Ikj];
398
399 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
400 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
401 "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
402
403 Cvals[c_status[Cij]] += Aval * Ivals[j];
404 }
405 }
406 }
407 }
408
409 C.fillComplete(C.getDomainMap(), C.getRangeMap());
410}
411
412/*********************************************************************************************************/
413template<class Scalar,
414 class LocalOrdinal,
415 class GlobalOrdinal,
416 class LocalOrdinalViewType>
417void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
418 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
419 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
420 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
421 const LocalOrdinalViewType & Acol2Brow,
422 const LocalOrdinalViewType & Acol2Irow,
423 const LocalOrdinalViewType & Bcol2Ccol,
424 const LocalOrdinalViewType & Icol2Ccol,
425 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
426 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
427 const std::string& label,
428 const Teuchos::RCP<Teuchos::ParameterList>& params) {
429
430#ifdef HAVE_TPETRA_MMM_TIMINGS
431 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
432 using Teuchos::TimeMonitor;
433 Teuchos::RCP<TimeMonitor> MM;
434#endif
435
436 // Node-specific code
437 using Teuchos::RCP;
438
439 // Options
440 //int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
441 std::string myalg("KK");
442 if(!params.is_null()) {
443 if(params->isParameter("hip: jacobi algorithm"))
444 myalg = params->get("hip: jacobi algorithm",myalg);
445 }
446
447 if(myalg == "MSAK") {
448 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
449 }
450 else if(myalg == "KK") {
451 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
452 }
453 else {
454 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
455 }
456
457#ifdef HAVE_TPETRA_MMM_TIMINGS
458 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix HIPESFC"))));
459#endif
460
461 // Final Fillcomplete
462 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
463 labelList->set("Timer Label",label);
464 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
465
466 // NOTE: MSAK already fillCompletes, so we have to check here
467 if(!C.isFillComplete()) {
468 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
469 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
470 }
471
472}
473
474
475
476/*********************************************************************************************************/
477template<class Scalar,
478 class LocalOrdinal,
479 class GlobalOrdinal,
480 class LocalOrdinalViewType>
481void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
482 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
483 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
484 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
485 const LocalOrdinalViewType & targetMapToOrigRow_dev,
486 const LocalOrdinalViewType & targetMapToImportRow_dev,
487 const LocalOrdinalViewType & Bcol2Ccol_dev,
488 const LocalOrdinalViewType & Icol2Ccol_dev,
489 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
490 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
491 const std::string& label,
492 const Teuchos::RCP<Teuchos::ParameterList>& params) {
493
494 // FIXME: Right now, this is a cut-and-paste of the serial kernel
495 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
496
497#ifdef HAVE_TPETRA_MMM_TIMINGS
498 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
499 using Teuchos::TimeMonitor;
500 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse HIPCore"))));
501 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
502#endif
503 using Teuchos::RCP;
504 using Teuchos::rcp;
505
506 // Lots and lots of typedefs
507 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
508 typedef typename KCRS::StaticCrsGraphType graph_t;
509 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
510 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
511 typedef typename KCRS::values_type::non_const_type scalar_view_t;
512 typedef typename scalar_view_t::memory_space scalar_memory_space;
513
514 typedef Scalar SC;
515 typedef LocalOrdinal LO;
516 typedef GlobalOrdinal GO;
517 typedef Node NO;
518 typedef Map<LO,GO,NO> map_type;
519 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
520 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
521 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
522
523 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
524 // KDDKDD UVM Ideally, this function would run on device and use
525 // KDDKDD UVM KokkosKernels instead of this host implementation.
526 auto targetMapToOrigRow =
527 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
528 targetMapToOrigRow_dev);
529 auto targetMapToImportRow =
530 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
531 targetMapToImportRow_dev);
532 auto Bcol2Ccol =
533 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
534 Bcol2Ccol_dev);
535 auto Icol2Ccol =
536 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
537 Icol2Ccol_dev);
538
539
540 // Sizes
541 RCP<const map_type> Ccolmap = C.getColMap();
542 size_t m = Aview.origMatrix->getLocalNumRows();
543 size_t n = Ccolmap->getLocalNumElements();
544
545 // Grab the Kokkos::SparseCrsMatrices & inner stuff
546 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
547 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
548 const KCRS & Cmat = C.getLocalMatrixHost();
549
550 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
551 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
552 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
553 scalar_view_t Cvals = Cmat.values;
554
555 c_lno_view_t Irowptr;
556 lno_nnz_view_t Icolind;
557 scalar_view_t Ivals;
558 if(!Bview.importMatrix.is_null()) {
559 auto lclB = Bview.importMatrix->getLocalMatrixHost();
560 Irowptr = lclB.graph.row_map;
561 Icolind = lclB.graph.entries;
562 Ivals = lclB.values;
563 }
564
565 // Jacobi-specific inner stuff
566 auto Dvals =
567 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
568
569#ifdef HAVE_TPETRA_MMM_TIMINGS
570 MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse HIPCore - Compare"))));
571#endif
572
573 // The status array will contain the index into colind where this entry was last deposited.
574 // c_status[i] < CSR_ip - not in the row yet
575 // c_status[i] >= CSR_ip - this is the entry where you can find the data
576 // We start with this filled with INVALID's indicating that there are no entries yet.
577 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
578 std::vector<size_t> c_status(n, ST_INVALID);
579
580 // For each row of A/C
581 size_t CSR_ip = 0, OLD_ip = 0;
582 for (size_t i = 0; i < m; i++) {
583
584 // First fill the c_status array w/ locations where we're allowed to
585 // generate nonzeros for this row
586 OLD_ip = Crowptr[i];
587 CSR_ip = Crowptr[i+1];
588 for (size_t k = OLD_ip; k < CSR_ip; k++) {
589 c_status[Ccolind[k]] = k;
590
591 // Reset values in the row of C
592 Cvals[k] = SC_ZERO;
593 }
594
595 SC minusOmegaDval = -omega*Dvals(i,0);
596
597 // Entries of B
598 for (size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
599 Scalar Bval = Bvals[j];
600 if (Bval == SC_ZERO)
601 continue;
602 LO Bij = Bcolind[j];
603 LO Cij = Bcol2Ccol[Bij];
604
605 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
606 std::runtime_error, "Trying to insert a new entry into a static graph");
607
608 Cvals[c_status[Cij]] = Bvals[j];
609 }
610
611 // Entries of -omega * Dinv * A * B
612 for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
613 LO Aik = Acolind[k];
614 const SC Aval = Avals[k];
615 if (Aval == SC_ZERO)
616 continue;
617
618 if (targetMapToOrigRow[Aik] != LO_INVALID) {
619 // Local matrix
620 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
621
622 for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
623 LO Bkj = Bcolind[j];
624 LO Cij = Bcol2Ccol[Bkj];
625
626 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
627 std::runtime_error, "Trying to insert a new entry into a static graph");
628
629 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
630 }
631
632 } else {
633 // Remote matrix
634 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
635 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
636 LO Ikj = Icolind[j];
637 LO Cij = Icol2Ccol[Ikj];
638
639 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
640 std::runtime_error, "Trying to insert a new entry into a static graph");
641
642 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
643 }
644 }
645 }
646 }
647
648#ifdef HAVE_TPETRA_MMM_TIMINGS
649 MM2= Teuchos::null;
650 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse ESFC"))));
651#endif
652
653 C.fillComplete(C.getDomainMap(), C.getRangeMap());
654
655}
656
657/*********************************************************************************************************/
658template<class Scalar,
659 class LocalOrdinal,
660 class GlobalOrdinal,
661 class LocalOrdinalViewType>
662void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
663 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
664 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
665 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
666 const LocalOrdinalViewType & Acol2Brow,
667 const LocalOrdinalViewType & Acol2Irow,
668 const LocalOrdinalViewType & Bcol2Ccol,
669 const LocalOrdinalViewType & Icol2Ccol,
670 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
671 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
672 const std::string& label,
673 const Teuchos::RCP<Teuchos::ParameterList>& params) {
674
675#ifdef HAVE_TPETRA_MMM_TIMINGS
676 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
677 using Teuchos::TimeMonitor;
678 Teuchos::RCP<TimeMonitor> MM;
679#endif
680
681 // Check if the diagonal entries exist in debug mode
682 const bool debug = Tpetra::Details::Behavior::debug();
683 if(debug) {
684
685 auto rowMap = Aview.origMatrix->getRowMap();
686 Tpetra::Vector<Scalar> diags(rowMap);
687 Aview.origMatrix->getLocalDiagCopy(diags);
688 size_t diagLength = rowMap->getLocalNumElements();
689 Teuchos::Array<Scalar> diagonal(diagLength);
690 diags.get1dCopy(diagonal());
691
692 for(size_t i = 0; i < diagLength; ++i) {
693 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
694 std::runtime_error,
695 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
696 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
697 }
698 }
699
700 // Usings
701 using device_t = typename Tpetra::KokkosCompat::KokkosHIPWrapperNode::device_type;
703 using graph_t = typename matrix_t::StaticCrsGraphType;
704 using lno_view_t = typename graph_t::row_map_type::non_const_type;
705 using int_view_t = Kokkos::View<int *,
706 typename lno_view_t::array_layout,
707 typename lno_view_t::memory_space,
708 typename lno_view_t::memory_traits>;
709 using c_lno_view_t = typename graph_t::row_map_type::const_type;
710 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
711 using scalar_view_t = typename matrix_t::values_type::non_const_type;
712
713 // KokkosKernels handle
714 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
715 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
716 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
717 using int_handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
718 typename int_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
719 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
720
721 // Merge the B and Bimport matrices
722 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
723
724 // Get the properties and arrays of input matrices
725 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
726 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
727
728 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
729 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
730 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
731
732 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
733 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
734 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
735
736 // Arrays of the output matrix
737 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("row_mapC"), AnumRows + 1);
738 lno_nnz_view_t entriesC;
739 scalar_view_t valuesC;
740
741 // Options
742 int team_work_size = 16;
743 std::string myalg("SPGEMM_KK_MEMORY");
744 if(!params.is_null()) {
745 if(params->isParameter("hip: algorithm"))
746 myalg = params->get("hip: algorithm",myalg);
747 if(params->isParameter("hip: team work size"))
748 team_work_size = params->get("hip: team work size",team_work_size);
749 }
750
751 // Get the algorithm mode
752 std::string nodename("HIP");
753 std::string alg = nodename + std::string(" algorithm");
754 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
755 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
756
757 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
758 const bool useIntRowptrs =
759 irph.shouldUseIntRowptrs() &&
760 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
761
762 if (useIntRowptrs) {
763 int_handle_t kh;
764 kh.create_spgemm_handle(alg_enum);
765 kh.set_team_work_size(team_work_size);
766
767 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing("int_row_mapC"), AnumRows + 1);
768
769 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
770 auto Bint = irph.getIntRowptrMatrix(Bmerged);
771
772 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
773 Aint.graph.row_map, Aint.graph.entries, false,
774 Bint.graph.row_map, Bint.graph.entries, false,
775 int_row_mapC);
776
777 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
778 if (c_nnz_size){
779 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
780 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
781 }
782
783 // even though there is no TPL for this, we have to use the same handle that was used in the symbolic phase,
784 // so need to have a special int-typed call for this as well.
785 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
786 Aint.graph.row_map, Aint.graph.entries, Amat.values, false,
787 Bint.graph.row_map, Bint.graph.entries, Bint.values, false,
788 int_row_mapC, entriesC, valuesC,
789 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
790 // transfer the integer rowptrs back to the correct rowptr type
791 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(int i){ row_mapC(i) = int_row_mapC(i);});
792 kh.destroy_spgemm_handle();
793 } else {
794 handle_t kh;
795 kh.create_spgemm_handle(alg_enum);
796 kh.set_team_work_size(team_work_size);
797
798 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
799 Amat.graph.row_map, Amat.graph.entries, false,
800 Bmerged.graph.row_map, Bmerged.graph.entries, false,
801 row_mapC);
802
803 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
804 if (c_nnz_size){
805 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
806 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
807 }
808 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
809 Amat.graph.row_map, Amat.graph.entries, Amat.values, false,
810 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false,
811 row_mapC, entriesC, valuesC,
812 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
813 kh.destroy_spgemm_handle();
814 }
815
816#ifdef HAVE_TPETRA_MMM_TIMINGS
817 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix HIPSort"))));
818#endif
819
820 // Sort & set values
821 if (params.is_null() || params->get("sort entries",true))
822 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
823 C.setAllValues(row_mapC,entriesC,valuesC);
824
825#ifdef HAVE_TPETRA_MMM_TIMINGS
826 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix HIPESFC"))));
827#endif
828
829 // Final Fillcomplete
830 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
831 labelList->set("Timer Label",label);
832 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
833 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
834 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
835}
836
837 }//MMdetails
838}//Tpetra
839
840#endif//HIP
841
842#endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.