Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_ExtraKernels_decl.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_MATRIXMATRIX_EXTRAKERNELS_DECL_HPP
11#define TPETRA_MATRIXMATRIX_EXTRAKERNELS_DECL_HPP
13
14
15namespace Tpetra {
16
17namespace MatrixMatrix {
18
19 // This guy allows us to easily get an Unmanaged Kokkos View from a ManagedOne
20 template <typename View>
21 using UnmanagedView = Kokkos::View< typename View::data_type
22 , typename View::array_layout
23 , typename View::device_type
24 , typename Kokkos::MemoryTraits< Kokkos::Unmanaged>
25 >;
26
27 namespace ExtraKernels {
28
29 template<class CrsMatrixType>
30 size_t C_estimate_nnz_per_row(CrsMatrixType & A, CrsMatrixType &B);
31
32 // 2019 Apr 10 JJE:
33 // copies data from thread local chunks into a unified CSR structure
34 // 'const' on the inCol and inVals array is a lie. The routine will deallocate
35 // the thread local storage. Maybe they shouldn't be const. Or mark, non-const
36 // and have a helper function for the actual copies that takes these as const
37 // . The point of const is that we want the loops to optimize assuming the
38 // RHS is unchanging
39 template<class InColindArrayType,
40 class InValsArrayType,
41 class OutRowptrType,
42 class OutColindType,
43 class OutValsType>
44 void copy_out_from_thread_memory(const OutColindType& thread_total_nnz,
45 const InColindArrayType& Incolind,
46 const InValsArrayType& Invals,
47 const size_t m,
48 const double thread_chunk,
49 OutRowptrType& Outrowptr,
50 OutColindType& Outcolind,
51 OutValsType& Outvals);
52
53 /***************************** Matrix-Matrix OpenMP Only Kernels *****************************/
54#ifdef HAVE_TPETRA_INST_OPENMP
55 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
56 static inline void mult_A_B_newmatrix_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
57 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
58 const LocalOrdinalViewType & Acol2Brow,
59 const LocalOrdinalViewType & Acol2Irow,
60 const LocalOrdinalViewType & Bcol2Ccol,
61 const LocalOrdinalViewType & Icol2Ccol,
62 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
63 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
64 const std::string& label,
65 const Teuchos::RCP<Teuchos::ParameterList>& params);
66
67 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
68 static inline void mult_A_B_reuse_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
69 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
70 const LocalOrdinalViewType & Acol2Brow,
71 const LocalOrdinalViewType & Acol2Irow,
72 const LocalOrdinalViewType & Bcol2Ccol,
73 const LocalOrdinalViewType & Icol2Ccol,
74 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
75 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
76 const std::string& label,
77 const Teuchos::RCP<Teuchos::ParameterList>& params);
78
79 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
80 static inline void jacobi_A_B_newmatrix_LowThreadGustavsonKernel(Scalar omega,
81 const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
82 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
83 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
84 const LocalOrdinalViewType & Acol2Brow,
85 const LocalOrdinalViewType & Acol2Irow,
86 const LocalOrdinalViewType & Bcol2Ccol,
87 const LocalOrdinalViewType & Icol2Ccol,
88 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
89 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
90 const std::string& label,
91 const Teuchos::RCP<Teuchos::ParameterList>& params);
92
93 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
94 static inline void jacobi_A_B_reuse_LowThreadGustavsonKernel(Scalar omega,
95 const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> & Dinv,
96 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
97 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Bview,
98 const LocalOrdinalViewType & Acol2Brow,
99 const LocalOrdinalViewType & Acol2Irow,
100 const LocalOrdinalViewType & Bcol2Ccol,
101 const LocalOrdinalViewType & Icol2Ccol,
102 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& C,
103 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Cimport,
104 const std::string& label,
105 const Teuchos::RCP<Teuchos::ParameterList>& params);
106#endif
107
108 /***************************** Matrix-Matrix Generic Kernels *****************************/
109 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalOrdinalViewType>
110 static inline void jacobi_A_B_newmatrix_MultiplyScaleAddKernel(Scalar omega,
111 const Vector<Scalar,LocalOrdinal,GlobalOrdinal, Node> & Dinv,
112 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Aview,
113 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Bview,
114 const LocalOrdinalViewType & Acol2rrow,
115 const LocalOrdinalViewType & Acol2Irow,
116 const LocalOrdinalViewType & Bcol2Ccol,
117 const LocalOrdinalViewType & Icol2Ccol,
118 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
119 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > Cimport,
120 const std::string& label,
121 const Teuchos::RCP<Teuchos::ParameterList>& params);
122
123
124 /***************************** Triple Product OpenMP Only Kernels *****************************/
125#ifdef HAVE_TPETRA_INST_OPENMP
126 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class LocalOrdinalViewType>
127 static inline void mult_R_A_P_newmatrix_LowThreadGustavsonKernel(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Rview,
128 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Aview,
129 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Pview,
130 const LocalOrdinalViewType & Acol2Prow,
131 const LocalOrdinalViewType & Acol2PIrow,
132 const LocalOrdinalViewType & Pcol2Accol,
133 const LocalOrdinalViewType & PIcol2Accol,
134 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosOpenMPWrapperNode>& Ac,
135 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Tpetra::KokkosCompat::KokkosOpenMPWrapperNode> > Acimport,
136 const std::string& label = std::string(),
137 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
138#endif
139
140
141 }// ExtraKernels
142}//MatrixMatrix
143}//Tpetra
144
145
146
147#endif
Distributed sparse matrix-matrix multiply and add.
Namespace Tpetra contains the class and methods constituting the Tpetra library.