Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_crsMatrixAssembleElement.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
11#define TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
12
13#include "KokkosSparse_CrsMatrix.hpp"
15#include <type_traits>
16
17namespace Tpetra {
18namespace Details {
19
57template<class SparseMatrixType,
58 class ValsViewType>
59KOKKOS_FUNCTION
60typename SparseMatrixType::ordinal_type
62 const typename SparseMatrixType::ordinal_type lclRow,
63 const typename SparseMatrixType::ordinal_type lclColInds[],
64 const typename SparseMatrixType::ordinal_type sortPerm[],
65 const ValsViewType& vals,
66 const typename SparseMatrixType::ordinal_type numEntInInput,
67 const bool forceAtomic =
68#ifdef KOKKOS_ENABLE_SERIAL
69 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
70#else // NOT KOKKOS_ENABLE_SERIAL
71 false,
72#endif // KOKKOS_ENABLE_SERIAL
73 const bool checkInputIndices = true)
74{
75 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
76 matrix_scalar_type;
77 static_assert (std::is_same<matrix_scalar_type,
78 typename SparseMatrixType::value_type>::value,
79 "The matrix's entries must have a nonconst type.");
80 // static_assert (std::is_assignable<matrix_scalar_type,
81 // typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
82 // "The result of adding a matrix entry and an entry of vals "
83 // "MUST be assignable to a matrix entry.");
84 typedef typename SparseMatrixType::ordinal_type LO;
85 static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
86 "must be a built-in integer type.");
87
88 // If lclRow is NOT a valid row index, this will return a view of
89 // zero entries. If checkInputIndices is true, thus, then none of
90 // the input indices will be valid in that case.
91 auto row_view = A.row (lclRow);
92 const LO numEntInRow = static_cast<LO> (row_view.length);
93 // Number of valid local column indices found, that is, the number
94 // of input indices that are valid column indices found in row
95 // lclRow of the matrix. If not checking, we just return the number
96 // of input indices.
97 LO numValid = checkInputIndices ? static_cast<LO> (0) : numEntInRow;
98
99 // Since both the matrix row and the input (after permutation) are
100 // sorted, we only need to pass once over the matrix row. 'offset'
101 // tells us the current search position in the matrix row.
102 LO offset = 0;
103 for (LO j = 0; j < numEntInInput; ++j) {
104 const LO perm_index = sortPerm[j];
105 const LO lclColInd = lclColInds[perm_index];
106 // Search linearly in the matrix row for the current index.
107 // If we ever want binary search, this would be the place.
108 while (row_view.colidx(offset) != lclColInd) {
109 ++offset;
110 }
111
112 // If we could make checkInputIndices a compile-time constant,
113 // then the compiler might not need to insert a branch here. This
114 // should help vectorization, if vectorization is possible.
115 if (checkInputIndices) {
116 if (offset != numEntInRow) {
117 // If we could make forceAtomic a compile-time constant, then
118 // the compiler might not need to insert a branch here. This
119 // should help vectorization, if vectorization is possible.
120 if (forceAtomic) {
121 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
122 }
123 else {
124 row_view.value(offset) += vals[perm_index];
125 }
126 ++numValid;
127 }
128 }
129 else { // don't check input indices; assume they are in the row
130 // See above note on forceAtomic.
131 if (forceAtomic) {
132 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
133 }
134 else {
135 row_view.value(offset) += vals[perm_index];
136 }
137 }
138 }
139
140 return numValid;
141}
142
181template<class SparseMatrixType,
182 class ValsViewType>
183KOKKOS_FUNCTION
184typename SparseMatrixType::ordinal_type
186 const typename SparseMatrixType::ordinal_type lclRow,
187 const typename SparseMatrixType::ordinal_type lclColInds[],
188 const typename SparseMatrixType::ordinal_type sortPerm[],
189 const ValsViewType& vals,
190 const typename SparseMatrixType::ordinal_type numEntInInput,
191 const bool forceAtomic =
192#ifdef KOKKOS_ENABLE_SERIAL
193 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
194#else // NOT KOKKOS_ENABLE_SERIAL
195 false,
196#endif // KOKKOS_ENABLE_SERIAL
197 const bool checkInputIndices = true)
198{
199 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
200 matrix_scalar_type;
201 static_assert (std::is_same<matrix_scalar_type,
202 typename SparseMatrixType::value_type>::value,
203 "The matrix's entries must have a nonconst type.");
204 static_assert (std::is_assignable<matrix_scalar_type,
205 typename std::decay< decltype (A.values[0] + vals[0]) >::type>::value,
206 "The result of adding a matrix entry and an entry of vals "
207 "MUST be assignable to a matrix entry.");
208 typedef typename SparseMatrixType::ordinal_type LO;
209 static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
210 "must be a built-in integer type.");
211
212 // If lclRow is NOT a valid row index, this will return a view of
213 // zero entries. If checkInputIndices is true, thus, then none of
214 // the input indices will be valid in that case.
215 auto row_view = A.row (lclRow);
216 const LO numEntInRow = static_cast<LO> (row_view.length);
217 // Number of valid local column indices found, that is, the number
218 // of input indices that are valid column indices found in row
219 // lclRow of the matrix. If not checking, we just return the number
220 // of input indices.
221 LO numValid = checkInputIndices ? static_cast<LO> (0) : numEntInRow;
222
223 // Since both the matrix row and the input (after permutation) are
224 // sorted, we only need to pass once over the matrix row. 'offset'
225 // tells us the current search position in the matrix row.
226 LO offset = 0;
227 for (LO j = 0; j < numEntInInput; ++j) {
228 const LO perm_index = sortPerm[j];
229 const LO lclColInd = lclColInds[perm_index];
230 // Search linearly in the matrix row for the current index.
231 // If we ever want binary search, this would be the place.
232 while (row_view.colidx(offset) != lclColInd) {
233 ++offset;
234 }
235
236 // If checkInputIndices were a compile-time constant, then the
237 // compiler might not need to insert a branch here. This should
238 // help vectorization, if vectorization is possible at all.
239 if (checkInputIndices) {
240 if (offset != numEntInRow) {
241 // If forceAtomic were a compile-time constant, then the
242 // compiler might not need to insert a branch here. This
243 // could help vectorization, if vectorization is possible.
244 if (forceAtomic) {
245 Kokkos::atomic_assign (&(row_view.value(offset)), vals[perm_index]);
246 }
247 else {
248 row_view.value(offset) += vals[perm_index];
249 }
250 ++numValid;
251 }
252 }
253 else { // don't check input indices; assume they are in the row
254 // See above note on forceAtomic.
255 if (forceAtomic) {
256 Kokkos::atomic_add (&(row_view.value(offset)), vals[perm_index]);
257 }
258 else {
259 row_view.value(offset) += vals[perm_index];
260 }
261 }
262 }
263
264 return numValid;
265}
266
318template<class SparseMatrixType,
319 class VectorViewType,
320 class RhsViewType,
321 class LhsViewType>
322KOKKOS_FUNCTION
323typename SparseMatrixType::ordinal_type
324crsMatrixAssembleElement_sortedLinear (const SparseMatrixType& A,
325 const VectorViewType& x,
326 typename SparseMatrixType::ordinal_type lids[],
327 typename SparseMatrixType::ordinal_type sortPerm[],
328 const RhsViewType& rhs,
329 const LhsViewType& lhs,
330 const bool forceAtomic =
331#ifdef KOKKOS_ENABLE_SERIAL
332 ! std::is_same<typename SparseMatrixType::device_type::execution_space, Kokkos::Serial>::type,
333#else // NOT KOKKOS_ENABLE_SERIAL
334 false,
335#endif // KOKKOS_ENABLE_SERIAL
336 const bool checkInputIndices = true)
337{
338 typedef typename std::remove_const<typename SparseMatrixType::value_type>::type
339 matrix_scalar_type;
340 typedef typename std::remove_const<typename VectorViewType::value_type>::type
341 vector_scalar_type;
342 static_assert (std::is_same<matrix_scalar_type,
343 typename SparseMatrixType::value_type>::value,
344 "The sparse output matrix A's entries must have a nonconst type.");
345 static_assert (std::is_same<vector_scalar_type,
346 typename VectorViewType::value_type>::value,
347 "The dense output vector x's entries must have a nonconst type.");
348 // static_assert (std::is_assignable<matrix_scalar_type,
349 // typename std::decay< decltype (A.values[0] + lhs(0,0)) >::type>::value,
350 // "The result of adding a sparse matrix entry and an entry of "
351 // "lhs (the dense element matrix) "
352 // "MUST be assignable to a matrix entry.");
353 // static_assert (std::is_assignable<vector_scalar_type,
354 // typename std::decay< decltype (x[0] + rhs[0]) >::type>::value,
355 // "The result of adding a vector entry and an entry of "
356 // "rhs (the dense element vector) "
357 // "MUST be assignable to a vector entry.");
358 typedef typename SparseMatrixType::ordinal_type LO;
359 static_assert (std::is_integral<LO>::value, "SparseMatrixType::ordinal_type "
360 "must be a built-in integer type.");
361
362 const LO eltDim = rhs.extent (0);
363
364 // Generate sort permutation
365 for (LO i = 0; i < eltDim; ++i) {
366 sortPerm[i] = i;
367 }
368 shellSortKeysAndValues (lids, sortPerm, eltDim);
369
370 LO totalNumValid = 0;
371 for (LO r = 0; r < eltDim; ++r) {
372 const LO lid = lids[r];
373 //auto lhs_r = Kokkos::subview (lhs, sortPerm[r], Kokkos::ALL ());
374 auto lhs_r = Kokkos::subview (lhs, r, Kokkos::ALL ());
375
376 // This assumes that lid is always a valid row in the sparse
377 // matrix, and that the local indices in each row of the matrix
378 // are always sorted.
379 const LO curNumValid =
380 crsMatrixSumIntoValues_sortedSortedLinear (A, lid, lids, sortPerm, lhs_r,
381 eltDim, forceAtomic,
382 checkInputIndices);
383 if (forceAtomic) {
384 Kokkos::atomic_add (&x(lid), rhs(sortPerm[r]));
385 }
386 else {
387 x(lid) += rhs(sortPerm[r]);
388 }
389 totalNumValid += curNumValid;
390 }
391 return totalNumValid;
392}
393
394} // namespace Details
395} // namespace Tpetra
396
397#endif // TPETRA_DETAILS_CRSMATRIXASSEMBLEELEMENT_HPP
Declaration and definition of functions for sorting "short" arrays of keys and corresponding values.
Nonmember function that computes a residual Computes R = B - A * X.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixReplaceValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) = vals[sortPerm[j]], for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it's one word) the input array keys, and apply the resulting permutation to the input...
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixSumIntoValues_sortedSortedLinear(const SparseMatrixType &A, const typename SparseMatrixType::ordinal_type lclRow, const typename SparseMatrixType::ordinal_type lclColInds[], const typename SparseMatrixType::ordinal_type sortPerm[], const ValsViewType &vals, const typename SparseMatrixType::ordinal_type numEntInInput, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lclRow, lclColsInds[sortPerm[j]]) += vals[sortPerm[j]], for all j in 0 .. eltDim-1.
KOKKOS_FUNCTION SparseMatrixType::ordinal_type crsMatrixAssembleElement_sortedLinear(const SparseMatrixType &A, const VectorViewType &x, typename SparseMatrixType::ordinal_type lids[], typename SparseMatrixType::ordinal_type sortPerm[], const RhsViewType &rhs, const LhsViewType &lhs, const bool forceAtomic=false, const bool checkInputIndices=true)
A(lids[j], lids[j]) += lhs(j,j) and x(lids[j]) += rhs(j), for all j in 0 .. eltDim-1.
Namespace Tpetra contains the class and methods constituting the Tpetra library.