Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_SYCL.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
11#define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
12
13#include "Tpetra_Details_IntRowPtrHelper.hpp"
14
15#ifdef HAVE_TPETRA_INST_SYCL
16namespace Tpetra {
17namespace MMdetails {
18
19/*********************************************************************************************************/
20// MMM KernelWrappers for Partial Specialization to SYCL
21template<class Scalar,
22 class LocalOrdinal,
23 class GlobalOrdinal,
24 class LocalOrdinalViewType>
25struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
28 const LocalOrdinalViewType & Acol2Brow,
29 const LocalOrdinalViewType & Acol2Irow,
30 const LocalOrdinalViewType & Bcol2Ccol,
31 const LocalOrdinalViewType & Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
33 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
36
37
38
39 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
40 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
41 const LocalOrdinalViewType & Acol2Brow,
42 const LocalOrdinalViewType & Acol2Irow,
43 const LocalOrdinalViewType & Bcol2Ccol,
44 const LocalOrdinalViewType & Icol2Ccol,
45 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
46 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
47 const std::string& label = std::string(),
48 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
49
50};
51
52// Jacobi KernelWrappers for Partial Specialization to SYCL
53template<class Scalar,
54 class LocalOrdinal,
55 class GlobalOrdinal, class LocalOrdinalViewType>
56struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
57 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
58 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
59 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
60 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
61 const LocalOrdinalViewType & Acol2Brow,
62 const LocalOrdinalViewType & Acol2Irow,
63 const LocalOrdinalViewType & Bcol2Ccol,
64 const LocalOrdinalViewType & Icol2Ccol,
65 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
66 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
67 const std::string& label = std::string(),
68 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69
70 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
71 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
72 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
73 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
74 const LocalOrdinalViewType & Acol2Brow,
75 const LocalOrdinalViewType & Acol2Irow,
76 const LocalOrdinalViewType & Bcol2Ccol,
77 const LocalOrdinalViewType & Icol2Ccol,
78 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
79 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
80 const std::string& label = std::string(),
81 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
82
83 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
84 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
85 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
86 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
87 const LocalOrdinalViewType & Acol2Brow,
88 const LocalOrdinalViewType & Acol2Irow,
89 const LocalOrdinalViewType & Bcol2Ccol,
90 const LocalOrdinalViewType & Icol2Ccol,
91 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
92 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
93 const std::string& label = std::string(),
94 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
95};
96
97
98/*********************************************************************************************************/
99// AB NewMatrix Kernel wrappers (KokkosKernels/SYCL Version)
100template<class Scalar,
101 class LocalOrdinal,
102 class GlobalOrdinal,
103 class LocalOrdinalViewType>
104void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
105 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
106 const LocalOrdinalViewType & Acol2Brow,
107 const LocalOrdinalViewType & Acol2Irow,
108 const LocalOrdinalViewType & Bcol2Ccol,
109 const LocalOrdinalViewType & Icol2Ccol,
110 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
111 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
112 const std::string& label,
113 const Teuchos::RCP<Teuchos::ParameterList>& params) {
114
115
116#ifdef HAVE_TPETRA_MMM_TIMINGS
117 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
118 using Teuchos::TimeMonitor;
119 Teuchos::RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLWrapper")))));
120#endif
121 // Node-specific code
122 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
123 std::string nodename("SYCL");
124
125 // Lots and lots of typedefs
126 using Teuchos::RCP;
128 typedef typename KCRS::device_type device_t;
129 typedef typename KCRS::StaticCrsGraphType graph_t;
130 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
131 using int_view_t = Kokkos::View<int *, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
132 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
133 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
134 typedef typename KCRS::values_type::non_const_type scalar_view_t;
135 //typedef typename graph_t::row_map_type::const_type lno_view_t_const;
136
137 // Options
138 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
139 std::string myalg("SPGEMM_KK_MEMORY");
140 if(!params.is_null()) {
141 if(params->isParameter("sycl: algorithm"))
142 myalg = params->get("sycl: algorithm",myalg);
143 if(params->isParameter("sycl: team work size"))
144 team_work_size = params->get("sycl: team work size",team_work_size);
145 }
146
147 // KokkosKernelsHandle
148 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
149 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
150 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
151 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
152 typename int_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
153 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
154
155 // Grab the Kokkos::SparseCrsMatrices
156 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
157 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
158
159 c_lno_view_t Arowptr = Amat.graph.row_map,
160 Browptr = Bmat.graph.row_map;
161 const lno_nnz_view_t Acolind = Amat.graph.entries,
162 Bcolind = Bmat.graph.entries;
163 const scalar_view_t Avals = Amat.values,
164 Bvals = Bmat.values;
165
166 // Get the algorithm mode
167 std::string alg = nodename+std::string(" algorithm");
168 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
169 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
170 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
171
172 // Merge the B and Bimport matrices
173 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
174
175#ifdef HAVE_TPETRA_MMM_TIMINGS
176 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLCore"))));
177#endif
178
179 // Do the multiply on whatever we've got
180 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
181 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
182 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
183
184 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lno_row"), AnumRows + 1);
185 lno_nnz_view_t entriesC;
186 scalar_view_t valuesC;
187
188 // static_assert(std::is_void_v<typename KCRS::row_map_type::non_const_value_type>, "");
189 // static_assert(std::is_void_v<typename decltype(Bmerged)::row_map_type::non_const_value_type>, "");
190 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
191 const bool useIntRowptrs =
192 irph.shouldUseIntRowptrs() &&
193 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
194
195 if (useIntRowptrs) {
196 IntKernelHandle kh;
197 kh.create_spgemm_handle(alg_enum);
198 kh.set_team_work_size(team_work_size);
199
200 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_int_row"), AnumRows + 1);
201
202 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
203 auto Bint = irph.getIntRowptrMatrix(Bmerged);
204 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,false,Bint.graph.row_map,Bint.graph.entries,false, int_row_mapC);
205
206 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
207 if (c_nnz_size){
208 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
209 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
210 }
211 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,Aint.values,false,Bint.graph.row_map,Bint.graph.entries,Bint.values,false,int_row_mapC,entriesC,valuesC);
212 // transfer the integer rowptrs back to the correct rowptr type
213 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(int i){ row_mapC(i) = int_row_mapC(i);});
214 kh.destroy_spgemm_handle();
215
216 } else {
217 KernelHandle kh;
218 kh.create_spgemm_handle(alg_enum);
219 kh.set_team_work_size(team_work_size);
220
221 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
222
223 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
224 if (c_nnz_size){
225 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
226 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
227 }
228
229 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
230 kh.destroy_spgemm_handle();
231 }
232
233#ifdef HAVE_TPETRA_MMM_TIMINGS
234 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLSort"))));
235#endif
236
237 // Sort & set values
238 if (params.is_null() || params->get("sort entries",true))
239 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
240 C.setAllValues(row_mapC,entriesC,valuesC);
241
242#ifdef HAVE_TPETRA_MMM_TIMINGS
243 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLESFC"))));
244#endif
245
246 // Final Fillcomplete
247 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
248 labelList->set("Timer Label",label);
249 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
250 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
251 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
252}
253
254
255/*********************************************************************************************************/
256template<class Scalar,
257 class LocalOrdinal,
258 class GlobalOrdinal,
259 class LocalOrdinalViewType>
260void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
261 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
262 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
263 const LocalOrdinalViewType & targetMapToOrigRow_dev,
264 const LocalOrdinalViewType & targetMapToImportRow_dev,
265 const LocalOrdinalViewType & Bcol2Ccol_dev,
266 const LocalOrdinalViewType & Icol2Ccol_dev,
267 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
268 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
269 const std::string& label,
270 const Teuchos::RCP<Teuchos::ParameterList>& params) {
271
272 // FIXME: Right now, this is a cut-and-paste of the serial kernel
273 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
274
275#ifdef HAVE_TPETRA_MMM_TIMINGS
276 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
277 using Teuchos::TimeMonitor;
278 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse SerialCore"))));
279 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
280#endif
281 using Teuchos::RCP;
282 using Teuchos::rcp;
283
284
285 // Lots and lots of typedefs
286 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
287 typedef typename KCRS::StaticCrsGraphType graph_t;
288 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
289 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
290 typedef typename KCRS::values_type::non_const_type scalar_view_t;
291
292 typedef Scalar SC;
293 typedef LocalOrdinal LO;
294 typedef GlobalOrdinal GO;
295 typedef Node NO;
296 typedef Map<LO,GO,NO> map_type;
297 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
298 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
299 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
300
301 // Since this is being run on SYCL, we need to fence because the below code will use UVM
302 // typename graph_t::execution_space().fence();
303
304 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
305 // KDDKDD UVM Ideally, this function would run on device and use
306 // KDDKDD UVM KokkosKernels instead of this host implementation.
307 auto targetMapToOrigRow =
308 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
309 targetMapToOrigRow_dev);
310 auto targetMapToImportRow =
311 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
312 targetMapToImportRow_dev);
313 auto Bcol2Ccol =
314 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
315 Bcol2Ccol_dev);
316 auto Icol2Ccol =
317 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
318 Icol2Ccol_dev);
319
320 // Sizes
321 RCP<const map_type> Ccolmap = C.getColMap();
322 size_t m = Aview.origMatrix->getLocalNumRows();
323 size_t n = Ccolmap->getLocalNumElements();
324
325 // Grab the Kokkos::SparseCrsMatrices & inner stuff
326 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
327 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
328 const KCRS & Cmat = C.getLocalMatrixHost();
329
330 c_lno_view_t Arowptr = Amat.graph.row_map,
331 Browptr = Bmat.graph.row_map,
332 Crowptr = Cmat.graph.row_map;
333 const lno_nnz_view_t Acolind = Amat.graph.entries,
334 Bcolind = Bmat.graph.entries,
335 Ccolind = Cmat.graph.entries;
336 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
337 scalar_view_t Cvals = Cmat.values;
338
339 c_lno_view_t Irowptr;
340 lno_nnz_view_t Icolind;
341 scalar_view_t Ivals;
342 if(!Bview.importMatrix.is_null()) {
343 auto lclB = Bview.importMatrix->getLocalMatrixHost();
344 Irowptr = lclB.graph.row_map;
345 Icolind = lclB.graph.entries;
346 Ivals = lclB.values;
347 }
348
349#ifdef HAVE_TPETRA_MMM_TIMINGS
350 MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SerialCore - Compare"))));
351#endif
352
353 // Classic csr assembly (low memory edition)
354 // mfh 27 Sep 2016: The c_status array is an implementation detail
355 // of the local sparse matrix-matrix multiply routine.
356
357 // The status array will contain the index into colind where this entry was last deposited.
358 // c_status[i] < CSR_ip - not in the row yet
359 // c_status[i] >= CSR_ip - this is the entry where you can find the data
360 // We start with this filled with INVALID's indicating that there are no entries yet.
361 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
362 std::vector<size_t> c_status(n, ST_INVALID);
363
364 // For each row of A/C
365 size_t CSR_ip = 0, OLD_ip = 0;
366 for (size_t i = 0; i < m; i++) {
367 // First fill the c_status array w/ locations where we're allowed to
368 // generate nonzeros for this row
369 OLD_ip = Crowptr[i];
370 CSR_ip = Crowptr[i+1];
371 for (size_t k = OLD_ip; k < CSR_ip; k++) {
372 c_status[Ccolind[k]] = k;
373
374 // Reset values in the row of C
375 Cvals[k] = SC_ZERO;
376 }
377
378 for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
379 LO Aik = Acolind[k];
380 const SC Aval = Avals[k];
381 if (Aval == SC_ZERO)
382 continue;
383
384 if (targetMapToOrigRow[Aik] != LO_INVALID) {
385 // Local matrix
386 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
387
388 for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
389 LO Bkj = Bcolind[j];
390 LO Cij = Bcol2Ccol[Bkj];
391
392 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
393 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
394 "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
395
396 Cvals[c_status[Cij]] += Aval * Bvals[j];
397 }
398
399 } else {
400 // Remote matrix
401 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
402 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
403 LO Ikj = Icolind[j];
404 LO Cij = Icol2Ccol[Ikj];
405
406 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
407 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
408 "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
409
410 Cvals[c_status[Cij]] += Aval * Ivals[j];
411 }
412 }
413 }
414 }
415
416 C.fillComplete(C.getDomainMap(), C.getRangeMap());
417}
418
419/*********************************************************************************************************/
420template<class Scalar,
421 class LocalOrdinal,
422 class GlobalOrdinal,
423 class LocalOrdinalViewType>
424void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
425 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
426 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
427 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
428 const LocalOrdinalViewType & Acol2Brow,
429 const LocalOrdinalViewType & Acol2Irow,
430 const LocalOrdinalViewType & Bcol2Ccol,
431 const LocalOrdinalViewType & Icol2Ccol,
432 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
433 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
434 const std::string& label,
435 const Teuchos::RCP<Teuchos::ParameterList>& params) {
436
437#ifdef HAVE_TPETRA_MMM_TIMINGS
438 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
439 using Teuchos::TimeMonitor;
440 Teuchos::RCP<TimeMonitor> MM;
441#endif
442
443 // Node-specific code
444 using Teuchos::RCP;
445
446 // Options
447 //int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
448 std::string myalg("KK");
449 if(!params.is_null()) {
450 if(params->isParameter("sycl: jacobi algorithm"))
451 myalg = params->get("sycl: jacobi algorithm",myalg);
452 }
453
454 if(myalg == "MSAK") {
455 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
456 }
457 else if(myalg == "KK") {
458 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
459 }
460 else {
461 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
462 }
463
464#ifdef HAVE_TPETRA_MMM_TIMINGS
465 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
466#endif
467
468 // Final Fillcomplete
469 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
470 labelList->set("Timer Label",label);
471 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
472
473 // NOTE: MSAK already fillCompletes, so we have to check here
474 if(!C.isFillComplete()) {
475 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
476 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
477 }
478
479}
480
481
482
483/*********************************************************************************************************/
484template<class Scalar,
485 class LocalOrdinal,
486 class GlobalOrdinal,
487 class LocalOrdinalViewType>
488void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
489 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
490 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
491 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
492 const LocalOrdinalViewType & targetMapToOrigRow_dev,
493 const LocalOrdinalViewType & targetMapToImportRow_dev,
494 const LocalOrdinalViewType & Bcol2Ccol_dev,
495 const LocalOrdinalViewType & Icol2Ccol_dev,
496 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
497 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
498 const std::string& label,
499 const Teuchos::RCP<Teuchos::ParameterList>& params) {
500
501 // FIXME: Right now, this is a cut-and-paste of the serial kernel
502 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
503
504#ifdef HAVE_TPETRA_MMM_TIMINGS
505 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
506 using Teuchos::TimeMonitor;
507 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore"))));
508 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
509#endif
510 using Teuchos::RCP;
511 using Teuchos::rcp;
512
513 // Lots and lots of typedefs
514 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
515 typedef typename KCRS::StaticCrsGraphType graph_t;
516 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
517 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
518 typedef typename KCRS::values_type::non_const_type scalar_view_t;
519 typedef typename scalar_view_t::memory_space scalar_memory_space;
520
521 typedef Scalar SC;
522 typedef LocalOrdinal LO;
523 typedef GlobalOrdinal GO;
524 typedef Node NO;
525 typedef Map<LO,GO,NO> map_type;
526 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
527 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
528 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
529
530 // Since this is being run on SYCL, we need to fence because the below host code will use UVM
531 // KDDKDD typename graph_t::execution_space().fence();
532
533 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
534 // KDDKDD UVM Ideally, this function would run on device and use
535 // KDDKDD UVM KokkosKernels instead of this host implementation.
536 auto targetMapToOrigRow =
537 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
538 targetMapToOrigRow_dev);
539 auto targetMapToImportRow =
540 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
541 targetMapToImportRow_dev);
542 auto Bcol2Ccol =
543 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
544 Bcol2Ccol_dev);
545 auto Icol2Ccol =
546 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
547 Icol2Ccol_dev);
548
549
550 // Sizes
551 RCP<const map_type> Ccolmap = C.getColMap();
552 size_t m = Aview.origMatrix->getLocalNumRows();
553 size_t n = Ccolmap->getLocalNumElements();
554
555 // Grab the Kokkos::SparseCrsMatrices & inner stuff
556 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
557 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
558 const KCRS & Cmat = C.getLocalMatrixHost();
559
560 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
561 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
562 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
563 scalar_view_t Cvals = Cmat.values;
564
565 c_lno_view_t Irowptr;
566 lno_nnz_view_t Icolind;
567 scalar_view_t Ivals;
568 if(!Bview.importMatrix.is_null()) {
569 auto lclB = Bview.importMatrix->getLocalMatrixHost();
570 Irowptr = lclB.graph.row_map;
571 Icolind = lclB.graph.entries;
572 Ivals = lclB.values;
573 }
574
575 // Jacobi-specific inner stuff
576 auto Dvals =
577 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
578
579#ifdef HAVE_TPETRA_MMM_TIMINGS
580 MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore - Compare"))));
581#endif
582
583 // The status array will contain the index into colind where this entry was last deposited.
584 // c_status[i] < CSR_ip - not in the row yet
585 // c_status[i] >= CSR_ip - this is the entry where you can find the data
586 // We start with this filled with INVALID's indicating that there are no entries yet.
587 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
588 std::vector<size_t> c_status(n, ST_INVALID);
589
590 // For each row of A/C
591 size_t CSR_ip = 0, OLD_ip = 0;
592 for (size_t i = 0; i < m; i++) {
593
594 // First fill the c_status array w/ locations where we're allowed to
595 // generate nonzeros for this row
596 OLD_ip = Crowptr[i];
597 CSR_ip = Crowptr[i+1];
598 for (size_t k = OLD_ip; k < CSR_ip; k++) {
599 c_status[Ccolind[k]] = k;
600
601 // Reset values in the row of C
602 Cvals[k] = SC_ZERO;
603 }
604
605 SC minusOmegaDval = -omega*Dvals(i,0);
606
607 // Entries of B
608 for (size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
609 Scalar Bval = Bvals[j];
610 if (Bval == SC_ZERO)
611 continue;
612 LO Bij = Bcolind[j];
613 LO Cij = Bcol2Ccol[Bij];
614
615 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
616 std::runtime_error, "Trying to insert a new entry into a static graph");
617
618 Cvals[c_status[Cij]] = Bvals[j];
619 }
620
621 // Entries of -omega * Dinv * A * B
622 for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
623 LO Aik = Acolind[k];
624 const SC Aval = Avals[k];
625 if (Aval == SC_ZERO)
626 continue;
627
628 if (targetMapToOrigRow[Aik] != LO_INVALID) {
629 // Local matrix
630 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
631
632 for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
633 LO Bkj = Bcolind[j];
634 LO Cij = Bcol2Ccol[Bkj];
635
636 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
637 std::runtime_error, "Trying to insert a new entry into a static graph");
638
639 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
640 }
641
642 } else {
643 // Remote matrix
644 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
645 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
646 LO Ikj = Icolind[j];
647 LO Cij = Icol2Ccol[Ikj];
648
649 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
650 std::runtime_error, "Trying to insert a new entry into a static graph");
651
652 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
653 }
654 }
655 }
656 }
657
658#ifdef HAVE_TPETRA_MMM_TIMINGS
659 MM2= Teuchos::null;
660 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse ESFC"))));
661#endif
662
663 C.fillComplete(C.getDomainMap(), C.getRangeMap());
664
665}
666
667/*********************************************************************************************************/
668template<class Scalar,
669 class LocalOrdinal,
670 class GlobalOrdinal,
671 class LocalOrdinalViewType>
672void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
673 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
674 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
675 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
676 const LocalOrdinalViewType & Acol2Brow,
677 const LocalOrdinalViewType & Acol2Irow,
678 const LocalOrdinalViewType & Bcol2Ccol,
679 const LocalOrdinalViewType & Icol2Ccol,
680 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
681 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
682 const std::string& label,
683 const Teuchos::RCP<Teuchos::ParameterList>& params) {
684
685#ifdef HAVE_TPETRA_MMM_TIMINGS
686 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
687 using Teuchos::TimeMonitor;
688 Teuchos::RCP<TimeMonitor> MM;
689#endif
690
691 // Check if the diagonal entries exist in debug mode
692 const bool debug = Tpetra::Details::Behavior::debug();
693 if(debug) {
694
695 auto rowMap = Aview.origMatrix->getRowMap();
696 Tpetra::Vector<Scalar> diags(rowMap);
697 Aview.origMatrix->getLocalDiagCopy(diags);
698 size_t diagLength = rowMap->getLocalNumElements();
699 Teuchos::Array<Scalar> diagonal(diagLength);
700 diags.get1dCopy(diagonal());
701
702 for(size_t i = 0; i < diagLength; ++i) {
703 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
704 std::runtime_error,
705 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
706 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
707 }
708 }
709
710 // Usings
711 using device_t = typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
713 using graph_t = typename matrix_t::StaticCrsGraphType;
714 using lno_view_t = typename graph_t::row_map_type::non_const_type;
715 using int_view_t = Kokkos::View<int *,
716 typename lno_view_t::array_layout,
717 typename lno_view_t::memory_space,
718 typename lno_view_t::memory_traits>;
719 using c_lno_view_t = typename graph_t::row_map_type::const_type;
720 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
721 using scalar_view_t = typename matrix_t::values_type::non_const_type;
722
723 // KokkosKernels handle
724 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
725 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
726 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
727 using int_handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
728 typename int_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
729 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
730
731 // Merge the B and Bimport matrices
732 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
733
734 // Get the properties and arrays of input matrices
735 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
736 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
737
738 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
739 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
740 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
741
742 // Arrays of the output matrix
743 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("row_mapC"), AnumRows + 1);
744 lno_nnz_view_t entriesC;
745 scalar_view_t valuesC;
746
747 // Options
748 int team_work_size = 16;
749 std::string myalg("SPGEMM_KK_MEMORY");
750 if(!params.is_null()) {
751 if(params->isParameter("sycl: algorithm"))
752 myalg = params->get("sycl: algorithm",myalg);
753 if(params->isParameter("sycl: team work size"))
754 team_work_size = params->get("sycl: team work size",team_work_size);
755 }
756
757 // Get the algorithm mode
758 std::string nodename("SYCL");
759 std::string alg = nodename + std::string(" algorithm");
760 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
761 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
762
763 // decide whether to use integer-typed row pointers for this spgemm
764 Tpetra::Details::IntRowPtrHelper<decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
765 const bool useIntRowptrs =
766 irph.shouldUseIntRowptrs() &&
767 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
768
769 if (useIntRowptrs) {
770 int_handle_t kh;
771 kh.create_spgemm_handle(alg_enum);
772 kh.set_team_work_size(team_work_size);
773
774 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing("int_row_mapC"), AnumRows + 1);
775
776 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
777 auto Bint = irph.getIntRowptrMatrix(Bmerged);
778
779 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
780 Aint.graph.row_map, Aint.graph.entries, false,
781 Bint.graph.row_map, Bint.graph.entries, false,
782 int_row_mapC);
783
784 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
785 if (c_nnz_size){
786 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
787 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
788 }
789
790 // even though there is no TPL for this, we have to use the same handle that was used in the symbolic phase,
791 // so need to have a special int-typed call for this as well.
792 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
793 Aint.graph.row_map, Aint.graph.entries, Amat.values, false,
794 Bint.graph.row_map, Bint.graph.entries, Bint.values, false,
795 int_row_mapC, entriesC, valuesC,
796 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
797 // transfer the integer rowptrs back to the correct rowptr type
798 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(int i){ row_mapC(i) = int_row_mapC(i);});
799 kh.destroy_spgemm_handle();
800 } else {
801 handle_t kh;
802 kh.create_spgemm_handle(alg_enum);
803 kh.set_team_work_size(team_work_size);
804
805 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
806 Amat.graph.row_map, Amat.graph.entries, false,
807 Bmerged.graph.row_map, Bmerged.graph.entries, false,
808 row_mapC);
809
810 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
811 if (c_nnz_size){
812 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
813 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
814 }
815 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
816 Amat.graph.row_map, Amat.graph.entries, Amat.values, false,
817 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values, false,
818 row_mapC, entriesC, valuesC,
819 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
820 kh.destroy_spgemm_handle();
821 }
822
823#ifdef HAVE_TPETRA_MMM_TIMINGS
824 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLSort"))));
825#endif
826
827 // Sort & set values
828 if (params.is_null() || params->get("sort entries",true))
829 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
830 C.setAllValues(row_mapC,entriesC,valuesC);
831
832#ifdef HAVE_TPETRA_MMM_TIMINGS
833 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
834#endif
835
836 // Final Fillcomplete
837 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
838 labelList->set("Timer Label",label);
839 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
840 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
841 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
842}
843
844 }//MMdetails
845}//Tpetra
846
847#endif//SYCL
848
849#endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.