10#ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
11#define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
13#include "Tpetra_Details_IntRowPtrHelper.hpp"
15#ifdef HAVE_TPETRA_INST_SYCL
24 class LocalOrdinalViewType>
25struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
28 const LocalOrdinalViewType & Acol2Brow,
29 const LocalOrdinalViewType & Acol2Irow,
30 const LocalOrdinalViewType & Bcol2Ccol,
31 const LocalOrdinalViewType & Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
33 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
39 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
40 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
41 const LocalOrdinalViewType & Acol2Brow,
42 const LocalOrdinalViewType & Acol2Irow,
43 const LocalOrdinalViewType & Bcol2Ccol,
44 const LocalOrdinalViewType & Icol2Ccol,
45 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
46 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
47 const std::string& label = std::string(),
48 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
55 class GlobalOrdinal,
class LocalOrdinalViewType>
56struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
57 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
58 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
59 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
60 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
61 const LocalOrdinalViewType & Acol2Brow,
62 const LocalOrdinalViewType & Acol2Irow,
63 const LocalOrdinalViewType & Bcol2Ccol,
64 const LocalOrdinalViewType & Icol2Ccol,
65 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
66 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
67 const std::string& label = std::string(),
68 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
70 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
71 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
72 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
73 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
74 const LocalOrdinalViewType & Acol2Brow,
75 const LocalOrdinalViewType & Acol2Irow,
76 const LocalOrdinalViewType & Bcol2Ccol,
77 const LocalOrdinalViewType & Icol2Ccol,
78 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
79 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
80 const std::string& label = std::string(),
81 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
83 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
84 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
85 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
86 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
87 const LocalOrdinalViewType & Acol2Brow,
88 const LocalOrdinalViewType & Acol2Irow,
89 const LocalOrdinalViewType & Bcol2Ccol,
90 const LocalOrdinalViewType & Icol2Ccol,
91 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
92 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
93 const std::string& label = std::string(),
94 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
100template<
class Scalar,
103 class LocalOrdinalViewType>
104void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
105 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
106 const LocalOrdinalViewType & Acol2Brow,
107 const LocalOrdinalViewType & Acol2Irow,
108 const LocalOrdinalViewType & Bcol2Ccol,
109 const LocalOrdinalViewType & Icol2Ccol,
110 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
111 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
112 const std::string& label,
113 const Teuchos::RCP<Teuchos::ParameterList>& params) {
116#ifdef HAVE_TPETRA_MMM_TIMINGS
117 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
118 using Teuchos::TimeMonitor;
119 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLWrapper")))));
122 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
123 std::string nodename(
"SYCL");
128 typedef typename KCRS::device_type device_t;
129 typedef typename KCRS::StaticCrsGraphType graph_t;
130 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
131 using int_view_t = Kokkos::View<int *, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
132 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
133 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
134 typedef typename KCRS::values_type::non_const_type scalar_view_t;
138 int team_work_size = 16;
139 std::string myalg(
"SPGEMM_KK_MEMORY");
140 if(!params.is_null()) {
141 if(params->isParameter(
"sycl: algorithm"))
142 myalg = params->get(
"sycl: algorithm",myalg);
143 if(params->isParameter(
"sycl: team work size"))
144 team_work_size = params->get(
"sycl: team work size",team_work_size);
148 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
149 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
150 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
151 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
152 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
153 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
156 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
157 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
159 c_lno_view_t Arowptr = Amat.graph.row_map,
160 Browptr = Bmat.graph.row_map;
161 const lno_nnz_view_t Acolind = Amat.graph.entries,
162 Bcolind = Bmat.graph.entries;
163 const scalar_view_t Avals = Amat.values,
167 std::string alg = nodename+std::string(
" algorithm");
169 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
170 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
173 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
175#ifdef HAVE_TPETRA_MMM_TIMINGS
176 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLCore"))));
180 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
181 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
182 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
184 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lno_row"), AnumRows + 1);
185 lno_nnz_view_t entriesC;
186 scalar_view_t valuesC;
190 Tpetra::Details::IntRowPtrHelper<
decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
191 const bool useIntRowptrs =
192 irph.shouldUseIntRowptrs() &&
193 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
197 kh.create_spgemm_handle(alg_enum);
198 kh.set_team_work_size(team_work_size);
200 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_int_row"), AnumRows + 1);
202 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
203 auto Bint = irph.getIntRowptrMatrix(Bmerged);
204 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,
false,Bint.graph.row_map,Bint.graph.entries,
false, int_row_mapC);
206 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
208 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
209 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
211 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,Aint.values,
false,Bint.graph.row_map,Bint.graph.entries,Bint.values,
false,int_row_mapC,entriesC,valuesC);
213 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(
int i){ row_mapC(i) = int_row_mapC(i);});
214 kh.destroy_spgemm_handle();
218 kh.create_spgemm_handle(alg_enum);
219 kh.set_team_work_size(team_work_size);
221 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
223 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
225 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
226 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
229 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
230 kh.destroy_spgemm_handle();
233#ifdef HAVE_TPETRA_MMM_TIMINGS
234 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLSort"))));
238 if (params.is_null() || params->get(
"sort entries",
true))
239 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
240 C.setAllValues(row_mapC,entriesC,valuesC);
242#ifdef HAVE_TPETRA_MMM_TIMINGS
243 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SYCLESFC"))));
247 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
248 labelList->set(
"Timer Label",label);
249 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
250 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
251 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
256template<
class Scalar,
259 class LocalOrdinalViewType>
260void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
261 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
262 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
263 const LocalOrdinalViewType & targetMapToOrigRow_dev,
264 const LocalOrdinalViewType & targetMapToImportRow_dev,
265 const LocalOrdinalViewType & Bcol2Ccol_dev,
266 const LocalOrdinalViewType & Icol2Ccol_dev,
267 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
268 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
269 const std::string& label,
270 const Teuchos::RCP<Teuchos::ParameterList>& params) {
273 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
275#ifdef HAVE_TPETRA_MMM_TIMINGS
276 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
277 using Teuchos::TimeMonitor;
278 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
279 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
286 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
287 typedef typename KCRS::StaticCrsGraphType graph_t;
288 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
289 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
290 typedef typename KCRS::values_type::non_const_type scalar_view_t;
293 typedef LocalOrdinal LO;
294 typedef GlobalOrdinal GO;
296 typedef Map<LO,GO,NO> map_type;
297 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
298 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
299 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
307 auto targetMapToOrigRow =
308 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
309 targetMapToOrigRow_dev);
310 auto targetMapToImportRow =
311 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
312 targetMapToImportRow_dev);
314 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
317 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
321 RCP<const map_type> Ccolmap = C.getColMap();
322 size_t m = Aview.origMatrix->getLocalNumRows();
323 size_t n = Ccolmap->getLocalNumElements();
326 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
327 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
328 const KCRS & Cmat = C.getLocalMatrixHost();
330 c_lno_view_t Arowptr = Amat.graph.row_map,
331 Browptr = Bmat.graph.row_map,
332 Crowptr = Cmat.graph.row_map;
333 const lno_nnz_view_t Acolind = Amat.graph.entries,
334 Bcolind = Bmat.graph.entries,
335 Ccolind = Cmat.graph.entries;
336 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
337 scalar_view_t Cvals = Cmat.values;
339 c_lno_view_t Irowptr;
340 lno_nnz_view_t Icolind;
342 if(!Bview.importMatrix.is_null()) {
343 auto lclB = Bview.importMatrix->getLocalMatrixHost();
344 Irowptr = lclB.graph.row_map;
345 Icolind = lclB.graph.entries;
349#ifdef HAVE_TPETRA_MMM_TIMINGS
350 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
362 std::vector<size_t> c_status(n, ST_INVALID);
365 size_t CSR_ip = 0, OLD_ip = 0;
366 for (
size_t i = 0; i < m; i++) {
370 CSR_ip = Crowptr[i+1];
371 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
372 c_status[Ccolind[k]] = k;
378 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
380 const SC Aval = Avals[k];
384 if (targetMapToOrigRow[Aik] != LO_INVALID) {
386 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
388 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
390 LO Cij = Bcol2Ccol[Bkj];
392 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
393 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
394 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
396 Cvals[c_status[Cij]] += Aval * Bvals[j];
401 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
402 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
404 LO Cij = Icol2Ccol[Ikj];
406 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
407 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
408 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
410 Cvals[c_status[Cij]] += Aval * Ivals[j];
416 C.fillComplete(C.getDomainMap(), C.getRangeMap());
420template<
class Scalar,
423 class LocalOrdinalViewType>
424void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
425 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
426 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
427 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
428 const LocalOrdinalViewType & Acol2Brow,
429 const LocalOrdinalViewType & Acol2Irow,
430 const LocalOrdinalViewType & Bcol2Ccol,
431 const LocalOrdinalViewType & Icol2Ccol,
432 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
433 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
434 const std::string& label,
435 const Teuchos::RCP<Teuchos::ParameterList>& params) {
437#ifdef HAVE_TPETRA_MMM_TIMINGS
438 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
439 using Teuchos::TimeMonitor;
440 Teuchos::RCP<TimeMonitor> MM;
448 std::string myalg(
"KK");
449 if(!params.is_null()) {
450 if(params->isParameter(
"sycl: jacobi algorithm"))
451 myalg = params->get(
"sycl: jacobi algorithm",myalg);
454 if(myalg ==
"MSAK") {
455 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
457 else if(myalg ==
"KK") {
458 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
461 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
464#ifdef HAVE_TPETRA_MMM_TIMINGS
465 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
469 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
470 labelList->set(
"Timer Label",label);
471 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
474 if(!C.isFillComplete()) {
475 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
476 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
484template<
class Scalar,
487 class LocalOrdinalViewType>
488void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
489 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
490 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
491 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
492 const LocalOrdinalViewType & targetMapToOrigRow_dev,
493 const LocalOrdinalViewType & targetMapToImportRow_dev,
494 const LocalOrdinalViewType & Bcol2Ccol_dev,
495 const LocalOrdinalViewType & Icol2Ccol_dev,
496 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
497 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
498 const std::string& label,
499 const Teuchos::RCP<Teuchos::ParameterList>& params) {
502 typedef Tpetra::KokkosCompat::KokkosSYCLWrapperNode Node;
504#ifdef HAVE_TPETRA_MMM_TIMINGS
505 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
506 using Teuchos::TimeMonitor;
507 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore"))));
508 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
514 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
515 typedef typename KCRS::StaticCrsGraphType graph_t;
516 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
517 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
518 typedef typename KCRS::values_type::non_const_type scalar_view_t;
519 typedef typename scalar_view_t::memory_space scalar_memory_space;
522 typedef LocalOrdinal LO;
523 typedef GlobalOrdinal GO;
525 typedef Map<LO,GO,NO> map_type;
526 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
527 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
528 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
536 auto targetMapToOrigRow =
537 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
538 targetMapToOrigRow_dev);
539 auto targetMapToImportRow =
540 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
541 targetMapToImportRow_dev);
543 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
546 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
551 RCP<const map_type> Ccolmap = C.getColMap();
552 size_t m = Aview.origMatrix->getLocalNumRows();
553 size_t n = Ccolmap->getLocalNumElements();
556 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
557 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
558 const KCRS & Cmat = C.getLocalMatrixHost();
560 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
561 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
562 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
563 scalar_view_t Cvals = Cmat.values;
565 c_lno_view_t Irowptr;
566 lno_nnz_view_t Icolind;
568 if(!Bview.importMatrix.is_null()) {
569 auto lclB = Bview.importMatrix->getLocalMatrixHost();
570 Irowptr = lclB.graph.row_map;
571 Icolind = lclB.graph.entries;
577 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
579#ifdef HAVE_TPETRA_MMM_TIMINGS
580 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SYCLCore - Compare"))));
588 std::vector<size_t> c_status(n, ST_INVALID);
591 size_t CSR_ip = 0, OLD_ip = 0;
592 for (
size_t i = 0; i < m; i++) {
597 CSR_ip = Crowptr[i+1];
598 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
599 c_status[Ccolind[k]] = k;
605 SC minusOmegaDval = -omega*Dvals(i,0);
608 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
609 Scalar Bval = Bvals[j];
613 LO Cij = Bcol2Ccol[Bij];
615 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
616 std::runtime_error,
"Trying to insert a new entry into a static graph");
618 Cvals[c_status[Cij]] = Bvals[j];
622 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
624 const SC Aval = Avals[k];
628 if (targetMapToOrigRow[Aik] != LO_INVALID) {
630 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
632 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
634 LO Cij = Bcol2Ccol[Bkj];
636 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
637 std::runtime_error,
"Trying to insert a new entry into a static graph");
639 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
644 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
645 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
647 LO Cij = Icol2Ccol[Ikj];
649 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
650 std::runtime_error,
"Trying to insert a new entry into a static graph");
652 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
658#ifdef HAVE_TPETRA_MMM_TIMINGS
660 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
663 C.fillComplete(C.getDomainMap(), C.getRangeMap());
668template<
class Scalar,
671 class LocalOrdinalViewType>
672void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
673 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> & Dinv,
674 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Aview,
675 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& Bview,
676 const LocalOrdinalViewType & Acol2Brow,
677 const LocalOrdinalViewType & Acol2Irow,
678 const LocalOrdinalViewType & Bcol2Ccol,
679 const LocalOrdinalViewType & Icol2Ccol,
680 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSYCLWrapperNode>& C,
681 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > Cimport,
682 const std::string& label,
683 const Teuchos::RCP<Teuchos::ParameterList>& params) {
685#ifdef HAVE_TPETRA_MMM_TIMINGS
686 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
687 using Teuchos::TimeMonitor;
688 Teuchos::RCP<TimeMonitor> MM;
695 auto rowMap = Aview.origMatrix->getRowMap();
696 Tpetra::Vector<Scalar> diags(rowMap);
697 Aview.origMatrix->getLocalDiagCopy(diags);
698 size_t diagLength = rowMap->getLocalNumElements();
699 Teuchos::Array<Scalar> diagonal(diagLength);
700 diags.get1dCopy(diagonal());
702 for(
size_t i = 0; i < diagLength; ++i) {
703 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
705 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
706 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
711 using device_t =
typename Tpetra::KokkosCompat::KokkosSYCLWrapperNode::device_type;
713 using graph_t =
typename matrix_t::StaticCrsGraphType;
714 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
715 using int_view_t = Kokkos::View<
int *,
716 typename lno_view_t::array_layout,
717 typename lno_view_t::memory_space,
718 typename lno_view_t::memory_traits>;
719 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
720 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
721 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
724 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
725 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
726 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
727 using int_handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
728 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
729 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
732 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
735 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
736 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
738 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
739 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
740 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
743 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"row_mapC"), AnumRows + 1);
744 lno_nnz_view_t entriesC;
745 scalar_view_t valuesC;
748 int team_work_size = 16;
749 std::string myalg(
"SPGEMM_KK_MEMORY");
750 if(!params.is_null()) {
751 if(params->isParameter(
"sycl: algorithm"))
752 myalg = params->get(
"sycl: algorithm",myalg);
753 if(params->isParameter(
"sycl: team work size"))
754 team_work_size = params->get(
"sycl: team work size",team_work_size);
758 std::string nodename(
"SYCL");
759 std::string alg = nodename + std::string(
" algorithm");
760 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
761 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
764 Tpetra::Details::IntRowPtrHelper<
decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
765 const bool useIntRowptrs =
766 irph.shouldUseIntRowptrs() &&
767 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
771 kh.create_spgemm_handle(alg_enum);
772 kh.set_team_work_size(team_work_size);
774 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"int_row_mapC"), AnumRows + 1);
776 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
777 auto Bint = irph.getIntRowptrMatrix(Bmerged);
779 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
780 Aint.graph.row_map, Aint.graph.entries,
false,
781 Bint.graph.row_map, Bint.graph.entries,
false,
784 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
786 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
787 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
792 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
793 Aint.graph.row_map, Aint.graph.entries, Amat.values,
false,
794 Bint.graph.row_map, Bint.graph.entries, Bint.values,
false,
795 int_row_mapC, entriesC, valuesC,
796 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
798 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(
int i){ row_mapC(i) = int_row_mapC(i);});
799 kh.destroy_spgemm_handle();
802 kh.create_spgemm_handle(alg_enum);
803 kh.set_team_work_size(team_work_size);
805 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
806 Amat.graph.row_map, Amat.graph.entries,
false,
807 Bmerged.graph.row_map, Bmerged.graph.entries,
false,
810 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
812 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
813 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
815 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
816 Amat.graph.row_map, Amat.graph.entries, Amat.values,
false,
817 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values,
false,
818 row_mapC, entriesC, valuesC,
819 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
820 kh.destroy_spgemm_handle();
823#ifdef HAVE_TPETRA_MMM_TIMINGS
824 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLSort"))));
828 if (params.is_null() || params->get(
"sort entries",
true))
829 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
830 C.setAllValues(row_mapC,entriesC,valuesC);
832#ifdef HAVE_TPETRA_MMM_TIMINGS
833 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix SYCLESFC"))));
837 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
838 labelList->set(
"Timer Label",label);
839 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
840 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosSYCLWrapperNode> > dummyExport;
841 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.