10#ifndef TPETRA_MATRIXMATRIX_HIP_DEF_HPP
11#define TPETRA_MATRIXMATRIX_HIP_DEF_HPP
13#include "Tpetra_Details_IntRowPtrHelper.hpp"
15#ifdef HAVE_TPETRA_INST_HIP
24 class LocalOrdinalViewType>
25struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
26 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
27 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
28 const LocalOrdinalViewType & Acol2Brow,
29 const LocalOrdinalViewType & Acol2Irow,
30 const LocalOrdinalViewType & Bcol2Ccol,
31 const LocalOrdinalViewType & Icol2Ccol,
32 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
33 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
34 const std::string& label = std::string(),
35 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
39 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
40 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
41 const LocalOrdinalViewType & Acol2Brow,
42 const LocalOrdinalViewType & Acol2Irow,
43 const LocalOrdinalViewType & Bcol2Ccol,
44 const LocalOrdinalViewType & Icol2Ccol,
45 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
46 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
47 const std::string& label = std::string(),
48 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
55 class GlobalOrdinal,
class LocalOrdinalViewType>
56struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType> {
57 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
58 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
59 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
60 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
61 const LocalOrdinalViewType & Acol2Brow,
62 const LocalOrdinalViewType & Acol2Irow,
63 const LocalOrdinalViewType & Bcol2Ccol,
64 const LocalOrdinalViewType & Icol2Ccol,
65 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
66 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
67 const std::string& label = std::string(),
68 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
70 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
71 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
72 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
73 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
74 const LocalOrdinalViewType & Acol2Brow,
75 const LocalOrdinalViewType & Acol2Irow,
76 const LocalOrdinalViewType & Bcol2Ccol,
77 const LocalOrdinalViewType & Icol2Ccol,
78 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
79 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
80 const std::string& label = std::string(),
81 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
83 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
84 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
85 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
86 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
87 const LocalOrdinalViewType & Acol2Brow,
88 const LocalOrdinalViewType & Acol2Irow,
89 const LocalOrdinalViewType & Bcol2Ccol,
90 const LocalOrdinalViewType & Icol2Ccol,
91 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
92 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
93 const std::string& label = std::string(),
94 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
100template<
class Scalar,
103 class LocalOrdinalViewType>
104void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
105 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
106 const LocalOrdinalViewType & Acol2Brow,
107 const LocalOrdinalViewType & Acol2Irow,
108 const LocalOrdinalViewType & Bcol2Ccol,
109 const LocalOrdinalViewType & Icol2Ccol,
110 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
111 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
112 const std::string& label,
113 const Teuchos::RCP<Teuchos::ParameterList>& params) {
115#ifdef HAVE_TPETRA_MMM_TIMINGS
116 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
117 using Teuchos::TimeMonitor;
118 Teuchos::RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPWrapper")))));
121 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
122 std::string nodename(
"HIP");
127 typedef typename KCRS::device_type device_t;
128 typedef typename KCRS::StaticCrsGraphType graph_t;
129 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
130 using int_view_t = Kokkos::View<int *, typename lno_view_t::array_layout, typename lno_view_t::memory_space, typename lno_view_t::memory_traits>;
131 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
132 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
133 typedef typename KCRS::values_type::non_const_type scalar_view_t;
137 int team_work_size = 16;
138 std::string myalg(
"SPGEMM_KK_MEMORY");
139 if(!params.is_null()) {
140 if(params->isParameter(
"hip: algorithm"))
141 myalg = params->get(
"hip: algorithm",myalg);
142 if(params->isParameter(
"hip: team work size"))
143 team_work_size = params->get(
"hip: team work size",team_work_size);
147 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
148 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
149 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space > KernelHandle;
150 using IntKernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<
151 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
152 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
155 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
156 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
158 c_lno_view_t Arowptr = Amat.graph.row_map,
159 Browptr = Bmat.graph.row_map;
160 const lno_nnz_view_t Acolind = Amat.graph.entries,
161 Bcolind = Bmat.graph.entries;
162 const scalar_view_t Avals = Amat.values,
166 std::string alg = nodename+std::string(
" algorithm");
168 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
169 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
172 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
174#ifdef HAVE_TPETRA_MMM_TIMINGS
175 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPCore"))));
179 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
180 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
181 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
184 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_lno_row"), AnumRows + 1);
185 lno_nnz_view_t entriesC;
186 scalar_view_t valuesC;
188 Tpetra::Details::IntRowPtrHelper<
decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
189 const bool useIntRowptrs =
190 irph.shouldUseIntRowptrs() &&
191 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
195 kh.create_spgemm_handle(alg_enum);
196 kh.set_team_work_size(team_work_size);
198 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"non_const_int_row"), AnumRows + 1);
200 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
201 auto Bint = irph.getIntRowptrMatrix(Bmerged);
202 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,
false,Bint.graph.row_map,Bint.graph.entries,
false, int_row_mapC);
204 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
206 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
207 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
209 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Aint.graph.row_map,Aint.graph.entries,Aint.values,
false,Bint.graph.row_map,Bint.graph.entries,Bint.values,
false,int_row_mapC,entriesC,valuesC);
211 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(
int i){ row_mapC(i) = int_row_mapC(i);});
212 kh.destroy_spgemm_handle();
215 kh.create_spgemm_handle(alg_enum);
216 kh.set_team_work_size(team_work_size);
218 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,
false,Bmerged.graph.row_map,Bmerged.graph.entries,
false,row_mapC);
220 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
222 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
223 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
226 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,
false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,
false,row_mapC,entriesC,valuesC);
227 kh.destroy_spgemm_handle();
230#ifdef HAVE_TPETRA_MMM_TIMINGS
231 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPSort"))));
235 if (params.is_null() || params->get(
"sort entries",
true))
236 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
237 C.setAllValues(row_mapC,entriesC,valuesC);
239#ifdef HAVE_TPETRA_MMM_TIMINGS
240 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix HIPESFC"))));
244 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
245 labelList->set(
"Timer Label",label);
246 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
247 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
248 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
253template<
class Scalar,
256 class LocalOrdinalViewType>
257void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
258 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
259 const LocalOrdinalViewType & targetMapToOrigRow_dev,
260 const LocalOrdinalViewType & targetMapToImportRow_dev,
261 const LocalOrdinalViewType & Bcol2Ccol_dev,
262 const LocalOrdinalViewType & Icol2Ccol_dev,
263 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
264 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
265 const std::string& label,
266 const Teuchos::RCP<Teuchos::ParameterList>& params) {
269 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
271#ifdef HAVE_TPETRA_MMM_TIMINGS
272 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
273 using Teuchos::TimeMonitor;
274 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
275 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
282 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
283 typedef typename KCRS::StaticCrsGraphType graph_t;
284 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
285 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
286 typedef typename KCRS::values_type::non_const_type scalar_view_t;
289 typedef LocalOrdinal LO;
290 typedef GlobalOrdinal GO;
292 typedef Map<LO,GO,NO> map_type;
293 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
294 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
295 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
300 auto targetMapToOrigRow =
301 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
302 targetMapToOrigRow_dev);
303 auto targetMapToImportRow =
304 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
305 targetMapToImportRow_dev);
307 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
310 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
314 RCP<const map_type> Ccolmap = C.getColMap();
315 size_t m = Aview.origMatrix->getLocalNumRows();
316 size_t n = Ccolmap->getLocalNumElements();
319 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
320 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
321 const KCRS & Cmat = C.getLocalMatrixHost();
323 c_lno_view_t Arowptr = Amat.graph.row_map,
324 Browptr = Bmat.graph.row_map,
325 Crowptr = Cmat.graph.row_map;
326 const lno_nnz_view_t Acolind = Amat.graph.entries,
327 Bcolind = Bmat.graph.entries,
328 Ccolind = Cmat.graph.entries;
329 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
330 scalar_view_t Cvals = Cmat.values;
332 c_lno_view_t Irowptr;
333 lno_nnz_view_t Icolind;
335 if(!Bview.importMatrix.is_null()) {
336 auto lclB = Bview.importMatrix->getLocalMatrixHost();
337 Irowptr = lclB.graph.row_map;
338 Icolind = lclB.graph.entries;
342#ifdef HAVE_TPETRA_MMM_TIMINGS
343 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
355 std::vector<size_t> c_status(n, ST_INVALID);
358 size_t CSR_ip = 0, OLD_ip = 0;
359 for (
size_t i = 0; i < m; i++) {
363 CSR_ip = Crowptr[i+1];
364 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
365 c_status[Ccolind[k]] = k;
371 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
373 const SC Aval = Avals[k];
377 if (targetMapToOrigRow[Aik] != LO_INVALID) {
379 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
381 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
383 LO Cij = Bcol2Ccol[Bkj];
385 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
386 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
387 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
389 Cvals[c_status[Cij]] += Aval * Bvals[j];
394 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
395 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
397 LO Cij = Icol2Ccol[Ikj];
399 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
400 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
401 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
403 Cvals[c_status[Cij]] += Aval * Ivals[j];
409 C.fillComplete(C.getDomainMap(), C.getRangeMap());
413template<
class Scalar,
416 class LocalOrdinalViewType>
417void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
418 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
419 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
420 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
421 const LocalOrdinalViewType & Acol2Brow,
422 const LocalOrdinalViewType & Acol2Irow,
423 const LocalOrdinalViewType & Bcol2Ccol,
424 const LocalOrdinalViewType & Icol2Ccol,
425 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
426 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
427 const std::string& label,
428 const Teuchos::RCP<Teuchos::ParameterList>& params) {
430#ifdef HAVE_TPETRA_MMM_TIMINGS
431 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
432 using Teuchos::TimeMonitor;
433 Teuchos::RCP<TimeMonitor> MM;
441 std::string myalg(
"KK");
442 if(!params.is_null()) {
443 if(params->isParameter(
"hip: jacobi algorithm"))
444 myalg = params->get(
"hip: jacobi algorithm",myalg);
447 if(myalg ==
"MSAK") {
448 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
450 else if(myalg ==
"KK") {
451 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
454 throw std::runtime_error(
"Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
457#ifdef HAVE_TPETRA_MMM_TIMINGS
458 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPESFC"))));
462 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
463 labelList->set(
"Timer Label",label);
464 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
467 if(!C.isFillComplete()) {
468 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
469 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
477template<
class Scalar,
480 class LocalOrdinalViewType>
481void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
482 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
483 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
484 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
485 const LocalOrdinalViewType & targetMapToOrigRow_dev,
486 const LocalOrdinalViewType & targetMapToImportRow_dev,
487 const LocalOrdinalViewType & Bcol2Ccol_dev,
488 const LocalOrdinalViewType & Icol2Ccol_dev,
489 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
490 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
491 const std::string& label,
492 const Teuchos::RCP<Teuchos::ParameterList>& params) {
495 typedef Tpetra::KokkosCompat::KokkosHIPWrapperNode Node;
497#ifdef HAVE_TPETRA_MMM_TIMINGS
498 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
499 using Teuchos::TimeMonitor;
500 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse HIPCore"))));
501 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
507 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
508 typedef typename KCRS::StaticCrsGraphType graph_t;
509 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
510 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
511 typedef typename KCRS::values_type::non_const_type scalar_view_t;
512 typedef typename scalar_view_t::memory_space scalar_memory_space;
515 typedef LocalOrdinal LO;
516 typedef GlobalOrdinal GO;
518 typedef Map<LO,GO,NO> map_type;
519 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
520 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
521 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
526 auto targetMapToOrigRow =
527 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
528 targetMapToOrigRow_dev);
529 auto targetMapToImportRow =
530 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
531 targetMapToImportRow_dev);
533 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
536 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
541 RCP<const map_type> Ccolmap = C.getColMap();
542 size_t m = Aview.origMatrix->getLocalNumRows();
543 size_t n = Ccolmap->getLocalNumElements();
546 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
547 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
548 const KCRS & Cmat = C.getLocalMatrixHost();
550 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
551 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
552 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
553 scalar_view_t Cvals = Cmat.values;
555 c_lno_view_t Irowptr;
556 lno_nnz_view_t Icolind;
558 if(!Bview.importMatrix.is_null()) {
559 auto lclB = Bview.importMatrix->getLocalMatrixHost();
560 Irowptr = lclB.graph.row_map;
561 Icolind = lclB.graph.entries;
567 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
569#ifdef HAVE_TPETRA_MMM_TIMINGS
570 MM2 = Teuchos::null; MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse HIPCore - Compare"))));
578 std::vector<size_t> c_status(n, ST_INVALID);
581 size_t CSR_ip = 0, OLD_ip = 0;
582 for (
size_t i = 0; i < m; i++) {
587 CSR_ip = Crowptr[i+1];
588 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
589 c_status[Ccolind[k]] = k;
595 SC minusOmegaDval = -omega*Dvals(i,0);
598 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
599 Scalar Bval = Bvals[j];
603 LO Cij = Bcol2Ccol[Bij];
605 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
606 std::runtime_error,
"Trying to insert a new entry into a static graph");
608 Cvals[c_status[Cij]] = Bvals[j];
612 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
614 const SC Aval = Avals[k];
618 if (targetMapToOrigRow[Aik] != LO_INVALID) {
620 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
622 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
624 LO Cij = Bcol2Ccol[Bkj];
626 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
627 std::runtime_error,
"Trying to insert a new entry into a static graph");
629 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
634 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
635 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
637 LO Cij = Icol2Ccol[Ikj];
639 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
640 std::runtime_error,
"Trying to insert a new entry into a static graph");
642 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
648#ifdef HAVE_TPETRA_MMM_TIMINGS
650 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
653 C.fillComplete(C.getDomainMap(), C.getRangeMap());
658template<
class Scalar,
661 class LocalOrdinalViewType>
662void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
663 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> & Dinv,
664 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Aview,
665 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& Bview,
666 const LocalOrdinalViewType & Acol2Brow,
667 const LocalOrdinalViewType & Acol2Irow,
668 const LocalOrdinalViewType & Bcol2Ccol,
669 const LocalOrdinalViewType & Icol2Ccol,
670 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosHIPWrapperNode>& C,
671 Teuchos::RCP<
const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > Cimport,
672 const std::string& label,
673 const Teuchos::RCP<Teuchos::ParameterList>& params) {
675#ifdef HAVE_TPETRA_MMM_TIMINGS
676 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
677 using Teuchos::TimeMonitor;
678 Teuchos::RCP<TimeMonitor> MM;
685 auto rowMap = Aview.origMatrix->getRowMap();
686 Tpetra::Vector<Scalar> diags(rowMap);
687 Aview.origMatrix->getLocalDiagCopy(diags);
688 size_t diagLength = rowMap->getLocalNumElements();
689 Teuchos::Array<Scalar> diagonal(diagLength);
690 diags.get1dCopy(diagonal());
692 for(
size_t i = 0; i < diagLength; ++i) {
693 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
695 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
696 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
701 using device_t =
typename Tpetra::KokkosCompat::KokkosHIPWrapperNode::device_type;
703 using graph_t =
typename matrix_t::StaticCrsGraphType;
704 using lno_view_t =
typename graph_t::row_map_type::non_const_type;
705 using int_view_t = Kokkos::View<
int *,
706 typename lno_view_t::array_layout,
707 typename lno_view_t::memory_space,
708 typename lno_view_t::memory_traits>;
709 using c_lno_view_t =
typename graph_t::row_map_type::const_type;
710 using lno_nnz_view_t =
typename graph_t::entries_type::non_const_type;
711 using scalar_view_t =
typename matrix_t::values_type::non_const_type;
714 using handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
715 typename lno_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
716 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
717 using int_handle_t =
typename KokkosKernels::Experimental::KokkosKernelsHandle<
718 typename int_view_t::const_value_type,
typename lno_nnz_view_t::const_value_type,
typename scalar_view_t::const_value_type,
719 typename device_t::execution_space,
typename device_t::memory_space,
typename device_t::memory_space >;
722 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
725 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
726 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
728 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
729 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
730 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
732 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
733 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
734 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
737 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"row_mapC"), AnumRows + 1);
738 lno_nnz_view_t entriesC;
739 scalar_view_t valuesC;
742 int team_work_size = 16;
743 std::string myalg(
"SPGEMM_KK_MEMORY");
744 if(!params.is_null()) {
745 if(params->isParameter(
"hip: algorithm"))
746 myalg = params->get(
"hip: algorithm",myalg);
747 if(params->isParameter(
"hip: team work size"))
748 team_work_size = params->get(
"hip: team work size",team_work_size);
752 std::string nodename(
"HIP");
753 std::string alg = nodename + std::string(
" algorithm");
754 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
755 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
757 Tpetra::Details::IntRowPtrHelper<
decltype(Bmerged)> irph(Bmerged.nnz(), Bmerged.graph.row_map);
758 const bool useIntRowptrs =
759 irph.shouldUseIntRowptrs() &&
760 Aview.origMatrix->getApplyHelper()->shouldUseIntRowptrs();
764 kh.create_spgemm_handle(alg_enum);
765 kh.set_team_work_size(team_work_size);
767 int_view_t int_row_mapC (Kokkos::ViewAllocateWithoutInitializing(
"int_row_mapC"), AnumRows + 1);
769 auto Aint = Aview.origMatrix->getApplyHelper()->getIntRowptrMatrix(Amat);
770 auto Bint = irph.getIntRowptrMatrix(Bmerged);
772 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
773 Aint.graph.row_map, Aint.graph.entries,
false,
774 Bint.graph.row_map, Bint.graph.entries,
false,
777 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
779 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
780 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
785 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
786 Aint.graph.row_map, Aint.graph.entries, Amat.values,
false,
787 Bint.graph.row_map, Bint.graph.entries, Bint.values,
false,
788 int_row_mapC, entriesC, valuesC,
789 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
791 Kokkos::parallel_for(int_row_mapC.size(), KOKKOS_LAMBDA(
int i){ row_mapC(i) = int_row_mapC(i);});
792 kh.destroy_spgemm_handle();
795 kh.create_spgemm_handle(alg_enum);
796 kh.set_team_work_size(team_work_size);
798 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
799 Amat.graph.row_map, Amat.graph.entries,
false,
800 Bmerged.graph.row_map, Bmerged.graph.entries,
false,
803 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
805 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing(
"entriesC"), c_nnz_size);
806 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing(
"valuesC"), c_nnz_size);
808 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
809 Amat.graph.row_map, Amat.graph.entries, Amat.values,
false,
810 Bmerged.graph.row_map, Bmerged.graph.entries, Bmerged.values,
false,
811 row_mapC, entriesC, valuesC,
812 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
813 kh.destroy_spgemm_handle();
816#ifdef HAVE_TPETRA_MMM_TIMINGS
817 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPSort"))));
821 if (params.is_null() || params->get(
"sort entries",
true))
822 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
823 C.setAllValues(row_mapC,entriesC,valuesC);
825#ifdef HAVE_TPETRA_MMM_TIMINGS
826 MM = Teuchos::null; MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix HIPESFC"))));
830 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
831 labelList->set(
"Timer Label",label);
832 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
833 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosHIPWrapperNode> > dummyExport;
834 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.