Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_OpenMP.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
11#define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
12
13#ifdef HAVE_TPETRA_INST_OPENMP
14namespace Tpetra {
15namespace MMdetails {
16
17/*********************************************************************************************************/
18// MMM KernelWrappers for Partial Specialization to OpenMP
19template<class Scalar,
20 class LocalOrdinal,
21 class GlobalOrdinal, class LocalOrdinalViewType>
22struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
23 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
24 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
25 const LocalOrdinalViewType & Acol2Brow,
26 const LocalOrdinalViewType & Acol2Irow,
27 const LocalOrdinalViewType & Bcol2Ccol,
28 const LocalOrdinalViewType & Icol2Ccol,
29 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
30 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
31 const std::string& label = std::string(),
32 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
33
34 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
35 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
36 const LocalOrdinalViewType & Acol2Brow,
37 const LocalOrdinalViewType & Acol2Irow,
38 const LocalOrdinalViewType & Bcol2Ccol,
39 const LocalOrdinalViewType & Icol2Ccol,
40 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
41 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
42 const std::string& label = std::string(),
43 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
44
45
46};
47
48
49// Jacobi KernelWrappers for Partial Specialization to OpenMP
50template<class Scalar,
51 class LocalOrdinal,
52 class GlobalOrdinal, class LocalOrdinalViewType>
53struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
54 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
55 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
58 const LocalOrdinalViewType & Acol2Brow,
59 const LocalOrdinalViewType & Acol2Irow,
60 const LocalOrdinalViewType & Bcol2Ccol,
61 const LocalOrdinalViewType & Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
63 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
64 const std::string& label = std::string(),
65 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
66
67 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
68 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
69 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
70 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
71 const LocalOrdinalViewType & Acol2Brow,
72 const LocalOrdinalViewType & Acol2Irow,
73 const LocalOrdinalViewType & Bcol2Ccol,
74 const LocalOrdinalViewType & Icol2Ccol,
75 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
76 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
77 const std::string& label = std::string(),
78 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
79
80 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
81 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
82 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
83 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
84 const LocalOrdinalViewType & Acol2Brow,
85 const LocalOrdinalViewType & Acol2Irow,
86 const LocalOrdinalViewType & Bcol2Ccol,
87 const LocalOrdinalViewType & Icol2Ccol,
88 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
89 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
90 const std::string& label = std::string(),
91 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
92
93};
94
95
96// Triple-Product KernelWrappers for Partial Specialization to OpenMP
97template<class Scalar,
98 class LocalOrdinal,
99 class GlobalOrdinal, class LocalOrdinalViewType>
100struct KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
101 static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
102 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
103 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
104 const LocalOrdinalViewType & Acol2Prow,
105 const LocalOrdinalViewType & Acol2PIrow,
106 const LocalOrdinalViewType & Pcol2Ccol,
107 const LocalOrdinalViewType & PIcol2Ccol,
108 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
109 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
110 const std::string& label = std::string(),
111 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
112
113static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
114 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
115 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
116 const LocalOrdinalViewType & Acol2Prow,
117 const LocalOrdinalViewType & Acol2PIrow,
118 const LocalOrdinalViewType & Pcol2Ccol,
119 const LocalOrdinalViewType & PIcol2Ccol,
120 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
121 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
122 const std::string& label = std::string(),
123 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
124
125 static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
126 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
127 const LocalOrdinalViewType & Acol2Prow,
128 const LocalOrdinalViewType & Acol2PIrow,
129 const LocalOrdinalViewType & Pcol2Ccol,
130 const LocalOrdinalViewType & PIcol2Ccol,
131 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
132 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
133 const std::string& label = std::string(),
134 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
135
136 static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
137 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
138 const LocalOrdinalViewType & Acol2Prow,
139 const LocalOrdinalViewType & Acol2PIrow,
140 const LocalOrdinalViewType & Pcol2Ccol,
141 const LocalOrdinalViewType & PIcol2Ccol,
142 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
143 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
144 const std::string& label = std::string(),
145 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
146};
147
148
149/*********************************************************************************************************/
150template<class Scalar,
151 class LocalOrdinal,
152 class GlobalOrdinal,
153 class LocalOrdinalViewType>
154void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
155 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
156 const LocalOrdinalViewType & Acol2Brow,
157 const LocalOrdinalViewType & Acol2Irow,
158 const LocalOrdinalViewType & Bcol2Ccol,
159 const LocalOrdinalViewType & Icol2Ccol,
160 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
161 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
162 const std::string& label,
163 const Teuchos::RCP<Teuchos::ParameterList>& params) {
164
165#ifdef HAVE_TPETRA_MMM_TIMINGS
166 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
167 using Teuchos::TimeMonitor;
168 Teuchos::RCP<TimeMonitor> MM;
169#endif
170
171 // Node-specific code
172 std::string nodename("OpenMP");
173
174 // Lots and lots of typedefs
175 using Teuchos::RCP;
177 typedef typename KCRS::device_type device_t;
178 typedef typename KCRS::StaticCrsGraphType graph_t;
179 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
180 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
181 typedef typename KCRS::values_type::non_const_type scalar_view_t;
182
183 // Options
184 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
185 std::string myalg("SPGEMM_KK_MEMORY");
186
187
188 if(!params.is_null()) {
189 if(params->isParameter("openmp: algorithm"))
190 myalg = params->get("openmp: algorithm",myalg);
191 if(params->isParameter("openmp: team work size"))
192 team_work_size = params->get("openmp: team work size",team_work_size);
193 }
194
195 if(myalg == "LTG") {
196 // Use the LTG kernel if requested
197 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
198 }
199 else {
200 // Use the Kokkos-Kernels OpenMP Kernel
201#ifdef HAVE_TPETRA_MMM_TIMINGS
202 MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
203#endif
204 // KokkosKernelsHandle
205 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
206 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
207 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
208
209 // Grab the Kokkos::SparseCrsMatrices
210 const KCRS & Ak = Aview.origMatrix->getLocalMatrixDevice();
211 // const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
212
213 // Get the algorithm mode
214 std::string alg = nodename+std::string(" algorithm");
215 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
216 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
217 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
218
219 // Merge the B and Bimport matrices
220 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
221
222#ifdef HAVE_TPETRA_MMM_TIMINGS
223 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
224#endif
225
226 // Do the multiply on whatever we've got
227 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
228 // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
229 // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
230 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
231 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
232
233
234 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
235 lno_nnz_view_t entriesC;
236 scalar_view_t valuesC;
237 KernelHandle kh;
238 kh.create_spgemm_handle(alg_enum);
239 kh.set_team_work_size(team_work_size);
240 // KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
241 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
242
243 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
244 if (c_nnz_size){
245 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
246 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
247 }
248 // KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
249 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
250 kh.destroy_spgemm_handle();
251
252#ifdef HAVE_TPETRA_MMM_TIMINGS
253 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
254#endif
255 // Sort & set values
256 if (params.is_null() || params->get("sort entries",true))
257 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
258 C.setAllValues(row_mapC,entriesC,valuesC);
259
260 }// end OMP KokkosKernels loop
261
262#ifdef HAVE_TPETRA_MMM_TIMINGS
263 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
264#endif
265
266 // Final Fillcomplete
267 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
268 labelList->set("Timer Label",label);
269 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
270 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
271 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
272
273#if 0
274 {
275 Teuchos::ArrayRCP< const size_t > Crowptr;
276 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
277 Teuchos::ArrayRCP< const Scalar > Cvalues;
278 C.getAllValues(Crowptr,Ccolind,Cvalues);
279
280 // DEBUG
281 int MyPID = C->getComm()->getRank();
282 printf("[%d] Crowptr = ",MyPID);
283 for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
284 printf("%3d ",(int)Crowptr.getConst()[i]);
285 }
286 printf("\n");
287 printf("[%d] Ccolind = ",MyPID);
288 for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
289 printf("%3d ",(int)Ccolind.getConst()[i]);
290 }
291 printf("\n");
292 fflush(stdout);
293 // END DEBUG
294 }
295#endif
296}
297
298
299/*********************************************************************************************************/
300template<class Scalar,
301 class LocalOrdinal,
302 class GlobalOrdinal,
303 class LocalOrdinalViewType>
304void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
305 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
306 const LocalOrdinalViewType & Acol2Brow,
307 const LocalOrdinalViewType & Acol2Irow,
308 const LocalOrdinalViewType & Bcol2Ccol,
309 const LocalOrdinalViewType & Icol2Ccol,
310 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
311 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
312 const std::string& label,
313 const Teuchos::RCP<Teuchos::ParameterList>& params) {
314#ifdef HAVE_TPETRA_MMM_TIMINGS
315 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
316 using Teuchos::TimeMonitor;
317 Teuchos::RCP<TimeMonitor> MM;
318#endif
319
320 // Lots and lots of typedefs
321 using Teuchos::RCP;
322
323 // Options
324 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
325 std::string myalg("LTG");
326 if(!params.is_null()) {
327 if(params->isParameter("openmp: algorithm"))
328 myalg = params->get("openmp: algorithm",myalg);
329 if(params->isParameter("openmp: team work size"))
330 team_work_size = params->get("openmp: team work size",team_work_size);
331 }
332
333 if(myalg == "LTG") {
334 // Use the LTG kernel if requested
335 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
336 }
337 else {
338 throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
339 }
340
341#ifdef HAVE_TPETRA_MMM_TIMINGS
342 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
343#endif
344 C.fillComplete(C.getDomainMap(), C.getRangeMap());
345}
346
347
348/*********************************************************************************************************/
349template<class Scalar,
350 class LocalOrdinal,
351 class GlobalOrdinal,
352 class LocalOrdinalViewType>
353void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
354 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
355 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
356 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
357 const LocalOrdinalViewType & Acol2Brow,
358 const LocalOrdinalViewType & Acol2Irow,
359 const LocalOrdinalViewType & Bcol2Ccol,
360 const LocalOrdinalViewType & Icol2Ccol,
361 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
362 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
363 const std::string& label,
364 const Teuchos::RCP<Teuchos::ParameterList>& params) {
365
366#ifdef HAVE_TPETRA_MMM_TIMINGS
367 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
368 using Teuchos::TimeMonitor;
369 Teuchos::RCP<TimeMonitor> MM;
370#endif
371
372 // Node-specific code
373 using Teuchos::RCP;
374
375 // Options
376 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
377 std::string myalg("LTG");
378 if(!params.is_null()) {
379 if(params->isParameter("openmp: jacobi algorithm"))
380 myalg = params->get("openmp: jacobi algorithm",myalg);
381 if(params->isParameter("openmp: team work size"))
382 team_work_size = params->get("openmp: team work size",team_work_size);
383 }
384
385 if(myalg == "LTG") {
386 // Use the LTG kernel if requested
387 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
388 }
389 else if(myalg == "MSAK") {
390 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
391 }
392 else if(myalg == "KK") {
393 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
394 }
395 else {
396 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
397 }
398
399#ifdef HAVE_TPETRA_MMM_TIMINGS
400 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
401#endif
402
403 // Final Fillcomplete
404 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
405 labelList->set("Timer Label",label);
406 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
407
408 // NOTE: MSAK already fillCompletes, so we have to check here
409 if(!C.isFillComplete()) {
410 RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
411 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
412 }
413
414}
415
416
417
418/*********************************************************************************************************/
419template<class Scalar,
420 class LocalOrdinal,
421 class GlobalOrdinal,
422 class LocalOrdinalViewType>
423void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
424 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
425 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
426 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
427 const LocalOrdinalViewType & Acol2Brow,
428 const LocalOrdinalViewType & Acol2Irow,
429 const LocalOrdinalViewType & Bcol2Ccol,
430 const LocalOrdinalViewType & Icol2Ccol,
431 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
432 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
433 const std::string& label,
434 const Teuchos::RCP<Teuchos::ParameterList>& params) {
435
436#ifdef HAVE_TPETRA_MMM_TIMINGS
437 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
438 using Teuchos::TimeMonitor;
439 Teuchos::RCP<TimeMonitor> MM;
440#endif
441
442 // Lots and lots of typedefs
443 using Teuchos::RCP;
444
445 // Options
446 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
447 std::string myalg("LTG");
448 if(!params.is_null()) {
449 if(params->isParameter("openmp: jacobi algorithm"))
450 myalg = params->get("openmp: jacobi algorithm",myalg);
451 if(params->isParameter("openmp: team work size"))
452 team_work_size = params->get("openmp: team work size",team_work_size);
453 }
454
455 if(myalg == "LTG") {
456 // Use the LTG kernel if requested
457 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
458 }
459 else {
460 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
461 }
462
463#ifdef HAVE_TPETRA_MMM_TIMINGS
464 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
465#endif
466 C.fillComplete(C.getDomainMap(), C.getRangeMap());
467
468}
469
470/*********************************************************************************************************/
471template<class Scalar,
472 class LocalOrdinal,
473 class GlobalOrdinal,
474 class LocalOrdinalViewType>
475void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
476 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
477 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
478 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
479 const LocalOrdinalViewType & Acol2Brow,
480 const LocalOrdinalViewType & Acol2Irow,
481 const LocalOrdinalViewType & Bcol2Ccol,
482 const LocalOrdinalViewType & Icol2Ccol,
483 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
484 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
485 const std::string& label,
486 const Teuchos::RCP<Teuchos::ParameterList>& params) {
487
488#ifdef HAVE_TPETRA_MMM_TIMINGS
489 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
490 using Teuchos::TimeMonitor;
491 Teuchos::RCP<TimeMonitor> MM;
492#endif
493
494 // Check if the diagonal entries exist in debug mode
495 const bool debug = Tpetra::Details::Behavior::debug();
496 if(debug) {
497
498 auto rowMap = Aview.origMatrix->getRowMap();
499 Tpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> diags(rowMap);
500 Aview.origMatrix->getLocalDiagCopy(diags);
501 size_t diagLength = rowMap->getLocalNumElements();
502 Teuchos::Array<Scalar> diagonal(diagLength);
503 diags.get1dCopy(diagonal());
504
505 for(size_t i = 0; i < diagLength; ++i) {
506 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
507 std::runtime_error,
508 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
509 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
510 }
511 }
512
513 // Usings
514 using device_t = typename Tpetra::KokkosCompat::KokkosOpenMPWrapperNode::device_type;
516 using graph_t = typename matrix_t::StaticCrsGraphType;
517 using lno_view_t = typename graph_t::row_map_type::non_const_type;
518 using c_lno_view_t = typename graph_t::row_map_type::const_type;
519 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
520 using scalar_view_t = typename matrix_t::values_type::non_const_type;
521
522 // KokkosKernels handle
523 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
524 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
525 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
526
527 // Get the rowPtr, colInd and vals of importMatrix
528 c_lno_view_t Irowptr;
529 lno_nnz_view_t Icolind;
530 scalar_view_t Ivals;
531 if(!Bview.importMatrix.is_null()) {
532 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
533 Irowptr = lclB.graph.row_map;
534 Icolind = lclB.graph.entries;
535 Ivals = lclB.values;
536 }
537
538 // Merge the B and Bimport matrices
539 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
540
541 // Get the properties and arrays of input matrices
542 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
543 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
544
545 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
546 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
547 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
548
549 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
550 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
551 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
552
553 // Arrays of the output matrix
554 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
555 lno_nnz_view_t entriesC;
556 scalar_view_t valuesC;
557
558 // Options
559 int team_work_size = 16;
560 std::string myalg("SPGEMM_KK_MEMORY");
561 if(!params.is_null()) {
562 if(params->isParameter("cuda: algorithm"))
563 myalg = params->get("cuda: algorithm",myalg);
564 if(params->isParameter("cuda: team work size"))
565 team_work_size = params->get("cuda: team work size",team_work_size);
566 }
567
568 // Get the algorithm mode
569 std::string nodename("OpenMP");
570 std::string alg = nodename + std::string(" algorithm");
571 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
572 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
573
574
575 // KokkosKernels call
576 handle_t kh;
577 kh.create_spgemm_handle(alg_enum);
578 kh.set_team_work_size(team_work_size);
579
580 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
581 Arowptr, Acolind, false,
582 Browptr, Bcolind, false,
583 row_mapC);
584
585 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
586 if (c_nnz_size){
587 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
588 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
589 }
590
591 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
592 Arowptr, Acolind, Avals, false,
593 Browptr, Bcolind, Bvals, false,
594 row_mapC, entriesC, valuesC,
595 omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
596 kh.destroy_spgemm_handle();
597
598#ifdef HAVE_TPETRA_MMM_TIMINGS
599 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
600#endif
601
602 // Sort & set values
603 if (params.is_null() || params->get("sort entries",true))
604 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
605 C.setAllValues(row_mapC,entriesC,valuesC);
606
607#ifdef HAVE_TPETRA_MMM_TIMINGS
608 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
609#endif
610
611 // Final Fillcomplete
612 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
613 labelList->set("Timer Label",label);
614 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
615 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > dummyExport;
616 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
617}
618
619
620/*********************************************************************************************************/
621template<class Scalar,
622 class LocalOrdinal,
623 class GlobalOrdinal,
624 class LocalOrdinalViewType>
625void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
626 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
627 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
628 const LocalOrdinalViewType & Acol2Prow,
629 const LocalOrdinalViewType & Acol2PIrow,
630 const LocalOrdinalViewType & Pcol2Accol,
631 const LocalOrdinalViewType & PIcol2Accol,
632 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
633 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
634 const std::string& label,
635 const Teuchos::RCP<Teuchos::ParameterList>& params) {
636
637
638
639#ifdef HAVE_TPETRA_MMM_TIMINGS
640 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
641 using Teuchos::TimeMonitor;
642 Teuchos::RCP<TimeMonitor> MM;
643#endif
644
645 // Node-specific code
646 std::string nodename("OpenMP");
647
648 // Options
649 std::string myalg("LTG");
650
651 if(!params.is_null()) {
652 if(params->isParameter("openmp: rap algorithm"))
653 myalg = params->get("openmp: rap algorithm",myalg);
654 }
655
656 if(myalg == "LTG") {
657 // Use the LTG kernel if requested
658 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
659 }
660 else {
661 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
662 }
663}
664
665/*********************************************************************************************************/
666template<class Scalar,
667 class LocalOrdinal,
668 class GlobalOrdinal,
669 class LocalOrdinalViewType>
670void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
671 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
672 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
673 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
674
675 const LocalOrdinalViewType & Acol2Prow,
676 const LocalOrdinalViewType & Acol2Irow,
677 const LocalOrdinalViewType & Pcol2Ccol,
678 const LocalOrdinalViewType & Icol2Ccol,
679 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
680 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
681 const std::string& label,
682 const Teuchos::RCP<Teuchos::ParameterList>& params) {
683
684#ifdef HAVE_TPETRA_MMM_TIMINGS
685 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
686 using Teuchos::TimeMonitor;
687 Teuchos::RCP<TimeMonitor> MM;
688#endif
689
690 // Lots and lots of typedefs
691 using Teuchos::RCP;
692
693 // Options
694 std::string myalg("LTG");
695 if(!params.is_null()) {
696 if(params->isParameter("openmp: rap algorithm"))
697 myalg = params->get("openmp: rap algorithm",myalg);
698 }
699
700 if(myalg == "LTG") {
701 // Use the LTG kernel if requested
702 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2Irow,Pcol2Ccol,Icol2Ccol,C,Cimport,label,params);
703 }
704 else {
705 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
706 }
707
708#ifdef HAVE_TPETRA_MMM_TIMINGS
709 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
710#endif
711 C.fillComplete(C.getDomainMap(), C.getRangeMap());
712
713}
714
715
716
717/*********************************************************************************************************/
718template<class Scalar,
719 class LocalOrdinal,
720 class GlobalOrdinal,
721 class LocalOrdinalViewType>
722void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
723
724 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
725 const LocalOrdinalViewType & Acol2Prow,
726 const LocalOrdinalViewType & Acol2PIrow,
727 const LocalOrdinalViewType & Pcol2Accol,
728 const LocalOrdinalViewType & PIcol2Accol,
729 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
730 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
731 const std::string& label,
732 const Teuchos::RCP<Teuchos::ParameterList>& params) {
733
734
735#ifdef HAVE_TPETRA_MMM_TIMINGS
736 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
737 using Teuchos::TimeMonitor;
738 Teuchos::RCP<TimeMonitor> MM;
739#endif
740
741 // Node-specific code
742 std::string nodename("OpenMP");
743
744 // Options
745 std::string myalg("LTG");
746
747 if(!params.is_null()) {
748 if(params->isParameter("openmp: ptap algorithm"))
749 myalg = params->get("openmp: ptap algorithm",myalg);
750 }
751
752 if(myalg == "LTG") {
753#ifdef HAVE_TPETRA_MMM_TIMINGS
754 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
755#endif
756
757 using Teuchos::ParameterList;
758 using Teuchos::RCP;
759 using LO = LocalOrdinal;
760 using GO = GlobalOrdinal;
761 using SC = Scalar;
762
763 // We don't need a kernel-level PTAP, we just transpose here
764 using transposer_type =
765 RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
766 transposer_type transposer (Pview.origMatrix, label + "XP: ");
767 RCP<ParameterList> transposeParams (new ParameterList);
768 if (! params.is_null ()) {
769 transposeParams->set ("compute global constants",
770 params->get ("compute global constants: temporaries",
771 false));
772 }
773 transposeParams->set ("sort", false);
774 RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
775 transposer.createTransposeLocal (transposeParams);
776 CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
777 Rview.origMatrix = Ptrans;
778
779 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
780 mult_R_A_P_newmatrix_LowThreadGustavsonKernel
781 (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
782 PIcol2Accol, Ac, Acimport, label, params);
783 }
784 else {
785 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
786 }
787}
788
789/*********************************************************************************************************/
790template<class Scalar,
791 class LocalOrdinal,
792 class GlobalOrdinal,
793 class LocalOrdinalViewType>
794void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
795
796 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
797 const LocalOrdinalViewType & Acol2Prow,
798 const LocalOrdinalViewType & Acol2PIrow,
799 const LocalOrdinalViewType & Pcol2Accol,
800 const LocalOrdinalViewType & PIcol2Accol,
801 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
802 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
803 const std::string& label,
804 const Teuchos::RCP<Teuchos::ParameterList>& params) {
805
806
807#ifdef HAVE_TPETRA_MMM_TIMINGS
808 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
809 using Teuchos::TimeMonitor;
810 Teuchos::RCP<TimeMonitor> MM;
811#endif
812
813 // Node-specific code
814 std::string nodename("OpenMP");
815
816 // Options
817 std::string myalg("LTG");
818
819 if(!params.is_null()) {
820 if(params->isParameter("openmp: ptap algorithm"))
821 myalg = params->get("openmp: ptap algorithm",myalg);
822 }
823
824 if(myalg == "LTG") {
825#ifdef HAVE_TPETRA_MMM_TIMINGS
826 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
827#endif
828
829 using Teuchos::ParameterList;
830 using Teuchos::RCP;
831 using LO = LocalOrdinal;
832 using GO = GlobalOrdinal;
833 using SC = Scalar;
834
835 // We don't need a kernel-level PTAP, we just transpose here
836 using transposer_type =
837 RowMatrixTransposer<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>;
838 transposer_type transposer (Pview.origMatrix, label + "XP: ");
839 RCP<ParameterList> transposeParams (new ParameterList);
840 if (! params.is_null ()) {
841 transposeParams->set ("compute global constants",
842 params->get ("compute global constants: temporaries",
843 false));
844 }
845 transposeParams->set ("sort", false);
846 RCP<CrsMatrix<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Ptrans =
847 transposer.createTransposeLocal (transposeParams);
848 CrsMatrixStruct<SC, LO, GO, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> Rview;
849 Rview.origMatrix = Ptrans;
850
851 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
852 mult_R_A_P_reuse_LowThreadGustavsonKernel
853 (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
854 PIcol2Accol, Ac, Acimport, label, params);
855 }
856 else {
857 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
858 }
859 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
860}
861
862
863}//MMdetails
864}//Tpetra
865
866#endif//OpenMP
867
868#endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.