Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockCrsMatrix_Helpers_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
11#define TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
12
14
15#include "Tpetra_BlockCrsMatrix.hpp"
16#include "Tpetra_CrsMatrix.hpp"
17#include "Tpetra_HashTable.hpp"
18#include "Tpetra_Import.hpp"
19#include "Tpetra_Map.hpp"
20#include "Tpetra_MultiVector.hpp"
21#include "Teuchos_ParameterList.hpp"
22#include "Teuchos_ScalarTraits.hpp"
23#include <ctime>
24#include <fstream>
25
26namespace Tpetra {
27
28 template<class Scalar, class LO, class GO, class Node>
29 void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::string const &fileName) {
30 Teuchos::ParameterList pl;
31 std::ofstream out;
32 out.open(fileName.c_str());
33 blockCrsMatrixWriter(A, out, pl);
34 }
35
36 template<class Scalar, class LO, class GO, class Node>
37 void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::string const &fileName, Teuchos::ParameterList const &params) {
38 std::ofstream out;
39 out.open(fileName.c_str());
40 blockCrsMatrixWriter(A, out, params);
41 }
42
43 template<class Scalar, class LO, class GO, class Node>
45 Teuchos::ParameterList pl;
46 blockCrsMatrixWriter(A, os, pl);
47 }
48
49 template<class Scalar, class LO, class GO, class Node>
50 void blockCrsMatrixWriter(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os, Teuchos::ParameterList const &params) {
51
52 using Teuchos::RCP;
53 using Teuchos::rcp;
54
55 typedef Teuchos::OrdinalTraits<GO> TOT;
56 typedef BlockCrsMatrix<Scalar, LO, GO, Node> block_crs_matrix_type;
61
62 RCP<const map_type> const rowMap = A.getRowMap(); //"mesh" map
63 RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
64 const int myRank = comm->getRank();
65 const size_t numProcs = comm->getSize();
66
67 // If true, force use of the import strip-mining infrastructure. This is useful for debugging on one process.
68 bool alwaysUseParallelAlgorithm = false;
69 if (params.isParameter("always use parallel algorithm"))
70 alwaysUseParallelAlgorithm = params.get<bool>("always use parallel algorithm");
71 bool printMatrixMarketHeader = true;
72 if (params.isParameter("print MatrixMarket header"))
73 printMatrixMarketHeader = params.get<bool>("print MatrixMarket header");
74
75 if (printMatrixMarketHeader && myRank==0) {
76 std::time_t now = std::time(NULL);
77
78 const std::string dataTypeStr =
79 Teuchos::ScalarTraits<Scalar>::isComplex ? "complex" : "real";
80
81 // Explanation of first line of file:
82 // - "%%MatrixMarket" is the tag for Matrix Market format.
83 // - "matrix" is what we're printing.
84 // - "coordinate" means sparse (triplet format), rather than dense.
85 // - "real" / "complex" is the type (in an output format sense,
86 // not in a C++ sense) of each value of the matrix. (The
87 // other two possibilities are "integer" (not really necessary
88 // here) and "pattern" (no values, just graph).)
89 os << "%%MatrixMarket matrix coordinate " << dataTypeStr << " general" << std::endl;
90 os << "% time stamp: " << ctime(&now);
91 os << "% written from " << numProcs << " processes" << std::endl;
92 os << "% point representation of Tpetra::BlockCrsMatrix" << std::endl;
93 size_t numRows = A.getGlobalNumRows();
94 size_t numCols = A.getGlobalNumCols();
95 os << "% " << numRows << " block rows, " << numCols << " block columns" << std::endl;
96 const LO blockSize = A.getBlockSize();
97 os << "% block size " << blockSize << std::endl;
98 os << numRows*blockSize << " " << numCols*blockSize << " " << A.getGlobalNumEntries()*blockSize*blockSize << std::endl;
99 }
100
101 if (numProcs==1 && !alwaysUseParallelAlgorithm) {
102 writeMatrixStrip(A,os,params);
103 } else {
104 size_t numRows = rowMap->getLocalNumElements();
105
106 //Create source map
107 RCP<const map_type> allMeshGidsMap = rcp(new map_type(TOT::invalid(), numRows, A.getIndexBase(), comm));
108 //Create and populate vector of mesh GIDs corresponding to this pid's rows.
109 //This vector will be imported one pid's worth of information at a time to pid 0.
110 mv_type allMeshGids(allMeshGidsMap,1);
111 Teuchos::ArrayRCP<GO> allMeshGidsData = allMeshGids.getDataNonConst(0);
112
113 for (size_t i=0; i<numRows; i++)
114 allMeshGidsData[i] = rowMap->getGlobalElement(i);
115 allMeshGidsData = Teuchos::null;
116
117 // Now construct a RowMatrix on PE 0 by strip-mining the rows of the input matrix A.
118 size_t stripSize = allMeshGids.getGlobalLength() / numProcs;
119 size_t remainder = allMeshGids.getGlobalLength() % numProcs;
120 size_t curStart = 0;
121 size_t curStripSize = 0;
122 Teuchos::Array<GO> importMeshGidList;
123 for (size_t i=0; i<numProcs; i++) {
124 if (myRank==0) { // Only PE 0 does this part
125 curStripSize = stripSize;
126 if (i<remainder) curStripSize++; // handle leftovers
127 importMeshGidList.resize(curStripSize); // Set size of vector to max needed
128 for (size_t j=0; j<curStripSize; j++) importMeshGidList[j] = j + curStart + A.getIndexBase();
129 curStart += curStripSize;
130 }
131 // The following import map should be non-trivial only on PE 0.
132 TEUCHOS_TEST_FOR_EXCEPTION(myRank>0 && curStripSize!=0,
133 std::runtime_error, "Tpetra::blockCrsMatrixWriter: (pid "
134 << myRank << ") map size should be zero, but is " << curStripSize);
135 RCP<map_type> importMeshGidMap = rcp(new map_type(TOT::invalid(), importMeshGidList(), A.getIndexBase(), comm));
136 import_type gidImporter(allMeshGidsMap, importMeshGidMap);
137 mv_type importMeshGids(importMeshGidMap, 1);
138 importMeshGids.doImport(allMeshGids, gidImporter, INSERT);
139
140 // importMeshGids now has a list of GIDs for the current strip of matrix rows.
141 // Use these values to build another importer that will get rows of the matrix.
142
143 // The following import map will be non-trivial only on PE 0.
144 Teuchos::ArrayRCP<const GO> importMeshGidsData = importMeshGids.getData(0);
145 Teuchos::Array<GO> importMeshGidsGO;
146 importMeshGidsGO.reserve(importMeshGidsData.size());
147 for (typename Teuchos::ArrayRCP<const GO>::size_type j=0; j<importMeshGidsData.size(); ++j)
148 importMeshGidsGO.push_back(importMeshGidsData[j]);
149 RCP<const map_type> importMap = rcp(new map_type(TOT::invalid(), importMeshGidsGO(), rowMap->getIndexBase(), comm) );
150
151 import_type importer(rowMap,importMap );
152 size_t numEntriesPerRow = A.getCrsGraph().getGlobalMaxNumRowEntries();
153 RCP<crs_graph_type> graph = createCrsGraph(importMap,numEntriesPerRow);
154 RCP<const map_type> domainMap = A.getCrsGraph().getDomainMap();
155 graph->doImport(A.getCrsGraph(), importer, INSERT);
156 graph->fillComplete(domainMap, importMap);
157
158 block_crs_matrix_type importA(*graph, A.getBlockSize());
159 importA.doImport(A, importer, INSERT);
160
161 // Finally we are ready to write this strip of the matrix
162 writeMatrixStrip(importA, os, params);
163 }
164 }
165 }
166
167 template<class Scalar, class LO, class GO, class Node>
168 void writeMatrixStrip(BlockCrsMatrix<Scalar,LO,GO,Node> const &A, std::ostream &os, Teuchos::ParameterList const &params) {
169 using Teuchos::RCP;
171 using bcrs_type = BlockCrsMatrix<Scalar,LO,GO,Node>;
172 using bcrs_local_inds_host_view_type = typename bcrs_type::local_inds_host_view_type;
173 using bcrs_values_host_view_type = typename bcrs_type::values_host_view_type;
174 using impl_scalar_type = typename bcrs_type::impl_scalar_type;
175
176 size_t numRows = A.getGlobalNumRows();
177 RCP<const map_type> rowMap = A.getRowMap();
178 RCP<const map_type> colMap = A.getColMap();
179 RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
180 const int myRank = comm->getRank();
181
182 const size_t meshRowOffset = rowMap->getIndexBase();
183 const size_t meshColOffset = colMap->getIndexBase();
184 TEUCHOS_TEST_FOR_EXCEPTION(meshRowOffset != meshColOffset,
185 std::runtime_error, "Tpetra::writeMatrixStrip: "
186 "mesh row index base != mesh column index base");
187
188 if (myRank !=0) {
189
190 TEUCHOS_TEST_FOR_EXCEPTION(A.getLocalNumRows() != 0,
191 std::runtime_error, "Tpetra::writeMatrixStrip: pid "
192 << myRank << " should have 0 rows but has " << A.getLocalNumRows());
193 TEUCHOS_TEST_FOR_EXCEPTION(A.getLocalNumCols() != 0,
194 std::runtime_error, "Tpetra::writeMatrixStrip: pid "
195 << myRank << " should have 0 columns but has " << A.getLocalNumCols());
196
197 } else {
198
199 TEUCHOS_TEST_FOR_EXCEPTION(numRows != A.getLocalNumRows(),
200 std::runtime_error, "Tpetra::writeMatrixStrip: "
201 "number of rows on pid 0 does not match global number of rows");
202
203
204 int err = 0;
205 const LO blockSize = A.getBlockSize();
206 const size_t numLocalRows = A.getLocalNumRows();
207 bool precisionChanged=false;
208 int oldPrecision = 0; // avoid "unused variable" warning
209 if (params.isParameter("precision")) {
210 oldPrecision = os.precision(params.get<int>("precision"));
211 precisionChanged=true;
212 }
213 int pointOffset = 1;
214 if (params.isParameter("zero-based indexing")) {
215 if (params.get<bool>("zero-based indexing") == true)
216 pointOffset = 0;
217 }
218
219 size_t localRowInd;
220 for (localRowInd = 0; localRowInd < numLocalRows; ++localRowInd) {
221
222 // Get a view of the current row.
223 bcrs_local_inds_host_view_type localColInds;
224 bcrs_values_host_view_type vals;
225 LO numEntries;
226 A.getLocalRowView (localRowInd, localColInds, vals); numEntries = localColInds.extent(0);
227 GO globalMeshRowID = rowMap->getGlobalElement(localRowInd) - meshRowOffset;
228
229 for (LO k = 0; k < numEntries; ++k) {
230 GO globalMeshColID = colMap->getGlobalElement(localColInds[k]) - meshColOffset;
231 const impl_scalar_type* curBlock = vals.data() + blockSize * blockSize * k;
232 // Blocks are stored in row-major format.
233 for (LO j = 0; j < blockSize; ++j) {
234 GO globalPointRowID = globalMeshRowID * blockSize + j + pointOffset;
235 for (LO i = 0; i < blockSize; ++i) {
236 GO globalPointColID = globalMeshColID * blockSize + i + pointOffset;
237 const impl_scalar_type curVal = curBlock[i + j * blockSize];
238
239 os << globalPointRowID << " " << globalPointColID << " ";
240 if (Teuchos::ScalarTraits<impl_scalar_type>::isComplex) {
241 // Matrix Market format wants complex values to be
242 // written as space-delimited pairs. See Bug 6469.
243 os << Teuchos::ScalarTraits<impl_scalar_type>::real (curVal) << " "
244 << Teuchos::ScalarTraits<impl_scalar_type>::imag (curVal);
245 }
246 else {
247 os << curVal;
248 }
249 os << std::endl;
250 }
251 }
252 }
253 }
254 if (precisionChanged)
255 os.precision(oldPrecision);
256 TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
257 std::runtime_error, "Tpetra::writeMatrixStrip: "
258 "error getting view of local row " << localRowInd);
259
260 }
261
262 }
263
264 template<class Scalar, class LO, class GO, class Node>
265 Teuchos::RCP<Tpetra::CrsGraph<LO, GO, Node> >
266 getBlockCrsGraph(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize, bool use_LID)
267 {
268 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph", getBlockCrsGraph0);
269 /*
270 ASSUMPTIONS:
271
272 1) In point matrix, all entries associated with a little block are present (even if they are zero).
273 2) For given mesh DOF, point DOFs appear consecutively and in ascending order in row & column maps.
274 3) Point column map and block column map are ordered consistently.
275 */
276
277 using Teuchos::Array;
278 using Teuchos::ArrayView;
279 using Teuchos::RCP;
280
283 typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
284
285 using local_graph_device_type = typename crs_matrix_type::local_graph_device_type;
286 using row_map_type = typename local_graph_device_type::row_map_type::non_const_type;
287 using entries_type = typename local_graph_device_type::entries_type::non_const_type;
288
289 using offset_type = typename row_map_type::non_const_value_type;
290
291 using execution_space = typename Node::execution_space;
292 using range_type = Kokkos::RangePolicy<execution_space, LO>;
293
294 const map_type &pointRowMap = *(pointMatrix.getRowMap());
295 RCP<const map_type> meshRowMap, meshColMap, meshDomainMap, meshRangeMap;
296
297 const map_type &pointColMap = *(pointMatrix.getColMap());
298 const map_type &pointDomainMap = *(pointMatrix.getDomainMap());
299 const map_type &pointRangeMap = *(pointMatrix.getRangeMap());
300
301 {
302 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph::createMeshMaps", getBlockCrsGraph1);
303 meshRowMap = createMeshMap<LO,GO,Node>(blockSize, pointRowMap, use_LID);
304 meshColMap = createMeshMap<LO,GO,Node>(blockSize, pointColMap, use_LID);
305 meshDomainMap = createMeshMap<LO,GO,Node>(blockSize, pointDomainMap, use_LID);
306 meshRangeMap = createMeshMap<LO,GO,Node>(blockSize, pointRangeMap, use_LID);
307 Kokkos::DefaultExecutionSpace().fence();
308 }
309
310 if(meshColMap.is_null()) throw std::runtime_error("ERROR: Cannot create mesh colmap");
311
312 auto localMeshColMap = meshColMap->getLocalMap();
313 auto localPointColMap = pointColMap.getLocalMap();
314 auto localPointRowMap = pointRowMap.getLocalMap();
315
316 RCP<crs_graph_type> meshCrsGraph;
317
318 const offset_type bs2 = blockSize * blockSize;
319
320 if (use_LID) {
321 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph::LID", getBlockCrsGraph2);
322 auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
323 auto pointRowptr = pointLocalGraph.row_map;
324 auto pointColind = pointLocalGraph.entries;
325
326 TEUCHOS_TEST_FOR_EXCEPTION(pointColind.extent(0) % bs2 != 0,
327 std::runtime_error, "Tpetra::getBlockCrsGraph: "
328 "local number of non zero entries is not a multiple of blockSize^2 ");
329
330 LO block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
331 row_map_type blockRowptr("blockRowptr", block_rows+1);
332 entries_type blockColind("blockColind", pointColind.extent(0)/(bs2));
333
334 Kokkos::parallel_for("fillMesh",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) {
335
336 const LO offset_b = pointRowptr(i*blockSize)/bs2;
337 const LO offset_b_max = pointRowptr((i+1)*blockSize)/bs2;
338
339 if (i==block_rows-1)
340 blockRowptr(i+1) = offset_b_max;
341 blockRowptr(i) = offset_b;
342
343 const LO offset_p = pointRowptr(i*blockSize);
344
345 for (LO k=0; k<offset_b_max-offset_b; ++k) {
346 blockColind(offset_b + k) = pointColind(offset_p + k * blockSize)/blockSize;
347 }
348 });
349
350 meshCrsGraph = rcp(new crs_graph_type(meshRowMap, meshColMap, blockRowptr, blockColind));
351 meshCrsGraph->fillComplete(meshDomainMap,meshRangeMap);
352 Kokkos::DefaultExecutionSpace().fence();
353 }
354 else {
355 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::getBlockCrsGraph::GID", getBlockCrsGraph3);
356 auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
357 auto pointRowptr = pointLocalGraph.row_map;
358 auto pointColind = pointLocalGraph.entries;
359
360 TEUCHOS_TEST_FOR_EXCEPTION(pointColind.extent(0) % bs2 != 0,
361 std::runtime_error, "Tpetra::getBlockCrsGraph: "
362 "local number of non zero entries is not a multiple of blockSize^2 ");
363
364 LO block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
365 row_map_type blockRowptr("blockRowptr", block_rows+1);
366 entries_type blockColind("blockColind", pointColind.extent(0)/(bs2));
367
368 Kokkos::parallel_for("fillMesh",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) {
369
370 const LO offset_b = pointRowptr(i*blockSize)/bs2;
371 const LO offset_b_max = pointRowptr((i+1)*blockSize)/bs2;
372
373 if (i==block_rows-1)
374 blockRowptr(i+1) = offset_b_max;
375 blockRowptr(i) = offset_b;
376
377 const LO offset_p = pointRowptr(i*blockSize);
378 const LO offset_p_max = pointRowptr((i+1)*blockSize);
379
380 LO filled_block = 0;
381 for (LO p_i=0; p_i<offset_p_max-offset_p; ++p_i) {
382 auto bcol_GID = localPointColMap.getGlobalElement(pointColind(offset_p + p_i))/blockSize;
383 auto bcol_LID = localMeshColMap.getLocalElement(bcol_GID);
384
385 bool visited = false;
386 for (LO k=0; k<filled_block; ++k) {
387 if (blockColind(offset_b + k) == bcol_LID)
388 visited = true;
389 }
390 if (!visited) {
391 blockColind(offset_b + filled_block) = bcol_LID;
392 ++filled_block;
393 }
394 }
395 });
396
397 meshCrsGraph = rcp(new crs_graph_type(meshRowMap, meshColMap, blockRowptr, blockColind));
398 meshCrsGraph->fillComplete(meshDomainMap,meshRangeMap);
399 Kokkos::DefaultExecutionSpace().fence();
400 }
401
402 return meshCrsGraph;
403 }
404
405 template<class Scalar, class LO, class GO, class Node>
406 Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node> >
407 convertToBlockCrsMatrix(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize, bool use_LID)
408 {
409 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::convertToBlockCrsMatrix", convertToBlockCrsMatrix0);
410 /*
411 ASSUMPTIONS:
412
413 1) In point matrix, all entries associated with a little block are present (even if they are zero).
414 2) For given mesh DOF, point DOFs appear consecutively and in ascending order in row & column maps.
415 3) Point column map and block column map are ordered consistently.
416 */
417
418 using Teuchos::Array;
419 using Teuchos::ArrayView;
420 using Teuchos::RCP;
421
422 typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
423 typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
424
425 using local_graph_device_type = typename crs_matrix_type::local_graph_device_type;
426 using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
427 using row_map_type = typename local_graph_device_type::row_map_type::non_const_type;
428 using values_type = typename local_matrix_device_type::values_type::non_const_type;
429
430 using offset_type = typename row_map_type::non_const_value_type;
431
432 using execution_space = typename Node::execution_space;
433 using range_type = Kokkos::RangePolicy<execution_space, LO>;
434
435 RCP<block_crs_matrix_type> blockMatrix;
436
437 const offset_type bs2 = blockSize * blockSize;
438
439 auto meshCrsGraph = getBlockCrsGraph(pointMatrix, blockSize, use_LID);
440
441 if (use_LID) {
442 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::convertToBlockCrsMatrix::LID", convertToBlockCrsMatrix1);
443 auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
444 auto pointRowptr = pointLocalGraph.row_map;
445 auto pointColind = pointLocalGraph.entries;
446
447 offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
448 values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2);
449 auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly);
450 auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;
451
452 Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
453 const offset_type blkBeg = blockRowptr[i];
454 const offset_type numBlocks = blockRowptr[i+1] - blkBeg;
455
456 // For each block in the row...
457 for (offset_type block=0; block < numBlocks; block++) {
458
459 // For each entry in the block...
460 for(LO little_row=0; little_row<blockSize; little_row++) {
461 offset_type point_row_offset = pointRowptr[i*blockSize + little_row];
462 for(LO little_col=0; little_col<blockSize; little_col++) {
463 blockValues((blkBeg+block) * bs2 + little_row * blockSize + little_col) =
464 pointValues[point_row_offset + block*blockSize + little_col];
465 }
466 }
467
468 }
469 });
470 blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockValues, blockSize));
471 Kokkos::DefaultExecutionSpace().fence();
472 }
473 else {
474 TEUCHOS_FUNC_TIME_MONITOR_DIFF("Tpetra::convertToBlockCrsMatrix::GID", convertToBlockCrsMatrix2);
475 auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap();
476 auto localPointColMap = pointMatrix.getColMap()->getLocalMap();
477
478 values_type blockValues("values", meshCrsGraph->getLocalNumEntries()*bs2);
479 {
480 auto pointLocalGraph = pointMatrix.getCrsGraph()->getLocalGraphDevice();
481 auto pointRowptr = pointLocalGraph.row_map;
482 auto pointColind = pointLocalGraph.entries;
483
484 offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0)-1)/blockSize;
485 auto pointValues = pointMatrix.getLocalValuesDevice (Access::ReadOnly);
486 auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;
487 auto blockColind = meshCrsGraph->getLocalGraphDevice().entries;
488
489 row_map_type pointGColind("pointGColind", pointColind.extent(0));
490
491 Kokkos::parallel_for("computePointGColind",range_type(0,pointColind.extent(0)),KOKKOS_LAMBDA(const LO i) {
492 pointGColind(i) = localPointColMap.getGlobalElement(pointColind(i));
493 });
494
495 row_map_type blockGColind("blockGColind", blockColind.extent(0));
496
497 Kokkos::parallel_for("computeBlockGColind",range_type(0,blockGColind.extent(0)),KOKKOS_LAMBDA(const LO i) {
498 blockGColind(i) = localMeshColMap.getGlobalElement(blockColind(i));
499 });
500
501 Kokkos::parallel_for("copyblockValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
502 const offset_type blkBeg = blockRowptr[i];
503 const offset_type numBlocks = blockRowptr[i+1] - blkBeg;
504
505 for (offset_type point_i=0; point_i < pointRowptr[i*blockSize + 1] - pointRowptr[i*blockSize]; point_i++) {
506
507 offset_type block_inv=static_cast<offset_type>(-1);
508 offset_type little_col_inv=static_cast<offset_type>(-1);
509 for (offset_type block_2=0; block_2 < numBlocks; block_2++) {
510 for (LO little_col_2=0; little_col_2 < blockSize; little_col_2++) {
511 if (blockGColind(blkBeg+block_2)*blockSize + little_col_2 == pointGColind(pointRowptr[i*blockSize] + point_i)) {
512 block_inv = block_2;
513 little_col_inv = little_col_2;
514 break;
515 }
516 }
517 if (block_inv!=static_cast<offset_type>(-1))
518 break;
519 }
520
521 for(LO little_row=0; little_row<blockSize; little_row++) {
522 blockValues((blkBeg+block_inv) * bs2 + little_row * blockSize + little_col_inv) = pointValues[pointRowptr[i*blockSize+little_row] + point_i];
523 }
524 }
525 });
526 }
527 blockMatrix = rcp(new block_crs_matrix_type(*meshCrsGraph, blockValues, blockSize));
528 Kokkos::DefaultExecutionSpace().fence();
529 }
530
531 return blockMatrix;
532
533 }
534
535 template<class Scalar, class LO, class GO, class Node>
536 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LO, GO, Node>>
537 fillLogicalBlocks(const Tpetra::CrsMatrix<Scalar, LO, GO, Node>& pointMatrix, const LO &blockSize)
538 {
540 using execution_space = typename Node::execution_space;
541 using dev_row_view_t = typename crs_t::local_graph_device_type::row_map_type::non_const_type;
542 using dev_col_view_t = typename crs_t::local_graph_device_type::entries_type::non_const_type;
543 using dev_val_view_t = typename crs_t::local_matrix_device_type::values_type::non_const_type;
544 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
545 using team_policy = Kokkos::TeamPolicy<execution_space>;
546 using member_type = typename team_policy::member_type;
547 using scratch_view = Kokkos::View<bool*, typename execution_space::scratch_memory_space>;
548 using Ordinal = typename dev_row_view_t::non_const_value_type;
549
550 // Get structure / values
551 auto local = pointMatrix.getLocalMatrixDevice();
552 auto row_ptrs = local.graph.row_map;
553 auto col_inds = local.graph.entries;
554 auto values = local.values;
555 const auto nrows = pointMatrix.getLocalNumRows();
556
557 //
558 // Populate all active blocks, they must be fully populated with entries so fill with zeroes
559 //
560
561 // Make row workspace views
562 dev_row_view_t new_rowmap("new_rowmap", nrows+1);
563 const auto blocks_per_row = nrows / blockSize; // assumes square matrix
564 dev_row_view_t active_block_row_map("active_block_row_map", blocks_per_row + 1);
565 const int max_threads = execution_space::concurrency();
566 assert(blockSize > 1);
567 assert(nrows % blockSize == 0);
568 const int mem_level = 1;
569 const int bytes = scratch_view::shmem_size(blocks_per_row);
570
571 if (max_threads >= blockSize) {
572 // Prefer 1 team per block since this will require a lot less scratch memory
573 team_policy tp(blocks_per_row, blockSize);
574
575 // Count active blocks
576 Kokkos::parallel_for("countActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(const member_type& team) {
577 Ordinal block_row = team.league_rank();
578
579 scratch_view row_block_active(team.team_scratch(mem_level), blocks_per_row);
580 Kokkos::single(
581 Kokkos::PerTeam(team), [&] () {
582 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
583 row_block_active(row_block_idx) = false;
584 }
585 });
586 team.team_barrier();
587
588 // All threads in a team scan a blocks-worth of rows to see which
589 // blocks are active
590 Kokkos::parallel_for(
591 Kokkos::TeamThreadRange(team, blockSize), [&] (Ordinal block_offset) {
592
593 Ordinal row = block_row*blockSize + block_offset;
594 Ordinal row_itr = row_ptrs(row);
595 Ordinal row_end = row_ptrs(row+1);
596
597 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
598 const Ordinal first_possible_col_in_block = row_block_idx * blockSize;
599 const Ordinal first_possible_col_in_next_block = (row_block_idx+1) * blockSize;
600 Ordinal curr_nnz_col = col_inds(row_itr);
601 while (curr_nnz_col >= first_possible_col_in_block && curr_nnz_col < first_possible_col_in_next_block && row_itr < row_end) {
602 // This block has at least one nnz in this row
603 row_block_active(row_block_idx) = true;
604 ++row_itr;
605 if (row_itr == row_end) break;
606 curr_nnz_col = col_inds(row_itr);
607 }
608 }
609 });
610
611 team.team_barrier();
612
613 Kokkos::single(
614 Kokkos::PerTeam(team), [&] () {
615 Ordinal count = 0;
616 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
617 if (row_block_active(row_block_idx)) {
618 ++count;
619 }
620 }
621 active_block_row_map(block_row) = count;
622 });
623 });
624 }
625 else {
626 // We don't have enough parallelism to make a thread team handle a block, so just
627 // have 1 thread per block
628 team_policy tp(blocks_per_row, 1);
629
630 // Count active blocks
631 Kokkos::parallel_for("countActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(const member_type& team) {
632 Ordinal block_row = team.league_rank();
633
634 scratch_view row_block_active(team.team_scratch(mem_level), blocks_per_row);
635 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
636 row_block_active(row_block_idx) = false;
637 }
638
639 // One thread scans a blocks-worth of rows to see which blocks are active
640 for (int block_offset=0; block_offset < blockSize; ++block_offset) {
641 Ordinal row = block_row*blockSize + block_offset;
642 Ordinal row_itr = row_ptrs(row);
643 Ordinal row_end = row_ptrs(row+1);
644
645 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
646 const Ordinal first_possible_col_in_block = row_block_idx * blockSize;
647 const Ordinal first_possible_col_in_next_block = (row_block_idx+1) * blockSize;
648 Ordinal curr_nnz_col = col_inds(row_itr);
649 while (curr_nnz_col >= first_possible_col_in_block && curr_nnz_col < first_possible_col_in_next_block && row_itr < row_end) {
650 // This block has at least one nnz in this row
651 row_block_active(row_block_idx) = true;
652 ++row_itr;
653 if (row_itr == row_end) break;
654 curr_nnz_col = col_inds(row_itr);
655 }
656 }
657 }
658
659 Ordinal count = 0;
660 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
661 if (row_block_active(row_block_idx)) {
662 ++count;
663 }
664 }
665 active_block_row_map(block_row) = count;
666 });
667 }
668
669 Ordinal nnz_block_count = 0;
670#if KOKKOSKERNELS_VERSION >= 40199
671 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
672 execution_space>(active_block_row_map.extent(0), active_block_row_map, nnz_block_count);
673#else
674 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
675 dev_row_view_t, execution_space>(active_block_row_map.extent(0), active_block_row_map, nnz_block_count);
676#endif
677 dev_col_view_t block_col_ids("block_col_ids", nnz_block_count);
678
679 // Find active blocks
680 if (max_threads >= blockSize) {
681 // Prefer 1 team per block since this will require a lot less scratch memory
682 team_policy tp(blocks_per_row, blockSize);
683
684 Kokkos::parallel_for("findActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(const member_type& team) {
685 Ordinal block_row = team.league_rank();
686
687 scratch_view row_block_active(team.team_scratch(mem_level), blocks_per_row);
688 Kokkos::single(
689 Kokkos::PerTeam(team), [&] () {
690 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
691 row_block_active(row_block_idx) = false;
692 }
693 });
694 team.team_barrier();
695
696 // All threads in a team scan a blocks-worth of rows to see which
697 // blocks are active
698 Kokkos::parallel_for(
699 Kokkos::TeamThreadRange(team, blockSize), [&] (Ordinal block_offset) {
700
701 Ordinal row = block_row*blockSize + block_offset;
702 Ordinal row_itr = row_ptrs(row);
703 Ordinal row_end = row_ptrs(row+1);
704
705 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
706 const Ordinal first_possible_col_in_block = row_block_idx * blockSize;
707 const Ordinal first_possible_col_in_next_block = (row_block_idx+1) * blockSize;
708 Ordinal curr_nnz_col = col_inds(row_itr);
709 while (curr_nnz_col >= first_possible_col_in_block && curr_nnz_col < first_possible_col_in_next_block && row_itr < row_end) {
710 // This block has at least one nnz in this row
711 row_block_active(row_block_idx) = true;
712 ++row_itr;
713 if (row_itr == row_end) break;
714 curr_nnz_col = col_inds(row_itr);
715 }
716 }
717 });
718
719 team.team_barrier();
720
721 Kokkos::single(
722 Kokkos::PerTeam(team), [&] () {
723 Ordinal offset = active_block_row_map[block_row];
724 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
725 if (row_block_active(row_block_idx)) {
726 block_col_ids(offset) = row_block_idx;
727 ++offset;
728 }
729 }
730 });
731 });
732 }
733 else {
734 team_policy tp(blocks_per_row, 1);
735
736 Kokkos::parallel_for("findActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(const member_type& team) {
737 Ordinal block_row = team.league_rank();
738
739 scratch_view row_block_active(team.team_scratch(mem_level), blocks_per_row);
740 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
741 row_block_active(row_block_idx) = false;
742 }
743
744 // One thread scans a blocks-worth of rows to see which blocks are active
745 for (int block_offset=0; block_offset < blockSize; ++block_offset) {
746 Ordinal row = block_row*blockSize + block_offset;
747 Ordinal row_itr = row_ptrs(row);
748 Ordinal row_end = row_ptrs(row+1);
749
750 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
751 const Ordinal first_possible_col_in_block = row_block_idx * blockSize;
752 const Ordinal first_possible_col_in_next_block = (row_block_idx+1) * blockSize;
753 Ordinal curr_nnz_col = col_inds(row_itr);
754 while (curr_nnz_col >= first_possible_col_in_block && curr_nnz_col < first_possible_col_in_next_block && row_itr < row_end) {
755 // This block has at least one nnz in this row
756 row_block_active(row_block_idx) = true;
757 ++row_itr;
758 if (row_itr == row_end) break;
759 curr_nnz_col = col_inds(row_itr);
760 }
761 }
762 }
763
764 Ordinal offset = active_block_row_map[block_row];
765 for (size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
766 if (row_block_active(row_block_idx)) {
767 block_col_ids(offset) = row_block_idx;
768 ++offset;
769 }
770 }
771 });
772 }
773
774 // Sizing
775 Kokkos::parallel_for("sizing", range_type(0, nrows), KOKKOS_LAMBDA(const size_t row) {
776 const auto block_row = row / blockSize;
777 const Ordinal block_row_begin = active_block_row_map(block_row);
778 const Ordinal block_row_end = active_block_row_map(block_row+1);
779 const Ordinal row_nnz = (block_row_end - block_row_begin) * blockSize;
780 new_rowmap(row) = row_nnz;
781 });
782
783 Ordinal new_nnz_count = 0;
784#if KOKKOSKERNELS_VERSION >= 40199
785 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
786 execution_space>(new_rowmap.extent(0), new_rowmap, new_nnz_count);
787#else
788 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
789 dev_row_view_t, execution_space>(new_rowmap.extent(0), new_rowmap, new_nnz_count);
790#endif
791 // Now populate cols and vals
792 dev_col_view_t new_col_ids("new_col_ids", new_nnz_count);
793 dev_val_view_t new_vals("new_vals", new_nnz_count);
794 Kokkos::parallel_for("entries", range_type(0, nrows), KOKKOS_LAMBDA(const size_t row) {
795 Ordinal row_itr = row_ptrs(row);
796 Ordinal row_end = row_ptrs(row+1);
797 Ordinal row_itr_new = new_rowmap(row);
798
799 Ordinal block_row = row / blockSize;
800 Ordinal block_row_begin = active_block_row_map(block_row);
801 Ordinal block_row_end = active_block_row_map(block_row+1);
802
803 for (Ordinal row_block_idx = block_row_begin; row_block_idx < block_row_end; ++row_block_idx) {
804 const Ordinal block_col = block_col_ids(row_block_idx);
805 const Ordinal first_possible_col_in_block = block_col * blockSize;
806 const Ordinal first_possible_col_in_next_block = (block_col+1) * blockSize;
807 for (Ordinal possible_col = first_possible_col_in_block; possible_col < first_possible_col_in_next_block; ++possible_col, ++row_itr_new) {
808 new_col_ids(row_itr_new) = possible_col;
809 Ordinal curr_nnz_col = col_inds(row_itr);
810 if (possible_col == curr_nnz_col && row_itr < row_end) {
811 // Already a non-zero entry
812 new_vals(row_itr_new) = values(row_itr);
813 ++row_itr;
814 }
815 }
816 }
817 });
818
819 // Create new, filled CRS
820 auto crs_row_map = pointMatrix.getRowMap();
821 auto crs_col_map = pointMatrix.getColMap();
822 Teuchos::RCP<crs_t> result = Teuchos::rcp(new crs_t(crs_row_map, crs_col_map, new_rowmap, new_col_ids, new_vals));
823 result->fillComplete();
824 return result;
825 }
826
827 template<class Scalar, class LO, class GO, class Node>
828 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LO, GO, Node>>
830 {
832 using dev_row_view_t = typename crs_t::local_graph_device_type::row_map_type::non_const_type;
833 using dev_col_view_t = typename crs_t::local_graph_device_type::entries_type::non_const_type;
834 using dev_val_view_t = typename crs_t::local_matrix_device_type::values_type::non_const_type;
835 using impl_scalar_t = typename Kokkos::ArithTraits<Scalar>::val_type;
836 using STS = Kokkos::ArithTraits<impl_scalar_t>;
837 using Ordinal = typename dev_row_view_t::non_const_value_type;
838 using execution_space = typename Node::execution_space;
839 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
840
841 // Get structure / values
842 auto local = pointMatrix.getLocalMatrixHost();
843 auto row_ptrs = local.graph.row_map;
844 auto col_inds = local.graph.entries;
845 auto values = local.values;
846 const auto nrows = pointMatrix.getLocalNumRows();
847
848 // Sizing and rows
849 dev_row_view_t new_rowmap("new_rowmap", nrows+1);
850 const impl_scalar_t zero = STS::zero();
851 Kokkos::parallel_for("sizing", range_type(0, nrows), KOKKOS_LAMBDA(const size_t row) {
852 const Ordinal row_nnz_begin = row_ptrs(row);
853 Ordinal row_nnzs = 0;
854 for (Ordinal row_nnz = row_nnz_begin; row_nnz < row_ptrs(row+1); ++row_nnz) {
855 const impl_scalar_t value = values(row_nnz);
856 if (value != zero) {
857 ++row_nnzs;
858 }
859 }
860 new_rowmap(row) = row_nnzs;
861 });
862
863 Ordinal real_nnzs = 0;
864#if KOKKOSKERNELS_VERSION >= 40199
865 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
866 execution_space>(new_rowmap.extent(0), new_rowmap, real_nnzs);
867#else
868 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
869 dev_row_view_t, execution_space>(new_rowmap.extent(0), new_rowmap, real_nnzs);
870#endif
871 // Now populate cols and vals
872 dev_col_view_t new_col_ids("new_col_ids", real_nnzs);
873 dev_val_view_t new_vals("new_vals", real_nnzs);
874 Kokkos::parallel_for("entries", range_type(0, nrows), KOKKOS_LAMBDA(const size_t row) {
875 Ordinal row_nnzs = 0;
876 const Ordinal old_row_nnz_begin = row_ptrs(row);
877 const Ordinal new_row_nnz_begin = new_rowmap(row);
878 for (Ordinal old_row_nnz = old_row_nnz_begin; old_row_nnz < row_ptrs(row+1); ++old_row_nnz) {
879 const impl_scalar_t value = values(old_row_nnz);
880 if (value != zero) {
881 new_col_ids(new_row_nnz_begin + row_nnzs) = col_inds(old_row_nnz);
882 new_vals(new_row_nnz_begin + row_nnzs) = value;
883 ++row_nnzs;
884 }
885 }
886 });
887
888 // Create new, unfilled CRS
889 auto crs_row_map = pointMatrix.getRowMap();
890 auto crs_col_map = pointMatrix.getColMap();
891 Teuchos::RCP<crs_t> result = Teuchos::rcp(new crs_t(crs_row_map, crs_col_map, new_rowmap, new_col_ids, new_vals));
892 result->fillComplete();
893 return result;
894 }
895
896 template<class LO, class GO, class Node>
897 Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
898 createMeshMap (const LO& blockSize, const Tpetra::Map<LO,GO,Node>& pointMap, bool use_LID)
899 {
900 typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
902
903 if(use_LID) {
904
905 using execution_space = typename Node::execution_space;
906 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
907
908 auto pointGlobalID = pointMap.getMyGlobalIndicesDevice();
909 LO block_rows = pointGlobalID.extent(0)/blockSize;
910 Kokkos::View<GO*, typename map_type::device_type> meshGlobalID("meshGlobalID", block_rows);
911
912 Kokkos::parallel_for("fillMeshMap",range_type(0,block_rows), KOKKOS_LAMBDA(const LO i) {
913 meshGlobalID(i) = pointGlobalID(i*blockSize)/blockSize;
914 });
915
916 Teuchos::RCP<const map_type> meshMap = Teuchos::rcp( new map_type(TOT::invalid(), meshGlobalID, 0, pointMap.getComm()) );
917 return meshMap;
918 }
919 else {
920 //calculate mesh GIDs
921 Teuchos::ArrayView<const GO> pointGids = pointMap.getLocalElementList();
922 Teuchos::Array<GO> meshGids;
923 GO indexBase = pointMap.getIndexBase();
924
925 // Use hash table to track whether we've encountered this GID previously. This will happen
926 // when striding through the point DOFs in a block. It should not happen otherwise.
927 // I don't use sort/make unique because I don't want to change the ordering.
928 meshGids.reserve(pointGids.size());
929 Tpetra::Details::HashTable<GO,int> hashTable(pointGids.size());
930 for (int i=0; i<pointGids.size(); ++i) {
931 GO meshGid = (pointGids[i]-indexBase) / blockSize + indexBase;
932 if (hashTable.get(meshGid) == -1) {
933 hashTable.add(meshGid,1); //(key,value)
934 meshGids.push_back(meshGid);
935 }
936 }
937
938 Teuchos::RCP<const map_type> meshMap = Teuchos::rcp( new map_type(TOT::invalid(), meshGids(), 0, pointMap.getComm()) );
939 return meshMap;
940 }
941 }
942
943
944 template<class LO, class GO, class Node>
945 Teuchos::RCP<const Tpetra::Map<LO,GO,Node> >
946 createPointMap (const LO& blockSize, const Tpetra::Map<LO,GO,Node>& blockMap)
947 {
948 typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
950
951 //calculate mesh GIDs
952 Teuchos::ArrayView<const GO> blockGids = blockMap.getLocalElementList();
953 Teuchos::Array<GO> pointGids(blockGids.size() * blockSize);
954 GO indexBase = blockMap.getIndexBase();
955
956 for(LO i=0, ct=0; i<(LO)blockGids.size(); i++) {
957 GO base = (blockGids[i] - indexBase)* blockSize + indexBase;
958 for(LO j=0; j<blockSize; j++, ct++)
959 pointGids[i*blockSize+j] = base+j;
960 }
961
962 Teuchos::RCP<const map_type> pointMap = Teuchos::rcp( new map_type(TOT::invalid(), pointGids(), 0, blockMap.getComm()) );
963 return pointMap;
964
965 }
966
967
968 template<class Scalar, class LO, class GO, class Node>
969 Teuchos::RCP<CrsMatrix<Scalar, LO, GO, Node> >
971
972 using Teuchos::Array;
973 using Teuchos::ArrayView;
974 using Teuchos::RCP;
975
976 typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
978 typedef Tpetra::CrsMatrix<Scalar, LO,GO,Node> crs_matrix_type;
979
980 using crs_graph_type = typename block_crs_matrix_type::crs_graph_type;
981 using local_graph_device_type = typename crs_matrix_type::local_graph_device_type;
982 using local_matrix_device_type = typename crs_matrix_type::local_matrix_device_type;
983 using row_map_type = typename local_graph_device_type::row_map_type::non_const_type;
984 using entries_type = typename local_graph_device_type::entries_type::non_const_type;
985 using values_type = typename local_matrix_device_type::values_type::non_const_type;
986
987 using row_map_type_const = typename local_graph_device_type::row_map_type;
988 using entries_type_const = typename local_graph_device_type::entries_type;
989
990 using little_block_type = typename block_crs_matrix_type::const_little_block_type;
991 using offset_type = typename row_map_type::non_const_value_type;
992 using column_type = typename entries_type::non_const_value_type;
993
994 using execution_space = typename Node::execution_space;
995 using range_type = Kokkos::RangePolicy<execution_space, LO>;
996
997
998 LO blocksize = blockMatrix.getBlockSize();
999 const offset_type bs2 = blocksize * blocksize;
1000 size_t block_nnz = blockMatrix.getLocalNumEntries();
1001 size_t point_nnz = block_nnz * bs2;
1002
1003 // We can get these from the blockMatrix directly
1004 RCP<const map_type> pointDomainMap = blockMatrix.getDomainMap();
1005 RCP<const map_type> pointRangeMap = blockMatrix.getRangeMap();
1006
1007 // We need to generate the row/col Map ourselves.
1008 RCP<const map_type> blockRowMap = blockMatrix.getRowMap();
1009 RCP<const map_type> pointRowMap = createPointMap<LO,GO,Node>(blocksize, *blockRowMap);
1010
1011 RCP<const map_type> blockColMap = blockMatrix.getColMap();
1012 RCP<const map_type> pointColMap = createPointMap<LO,GO,Node>(blocksize, *blockColMap);
1013
1014 // Get the last few things
1015
1016 const crs_graph_type & blockGraph = blockMatrix.getCrsGraph();
1017 LO point_rows = (LO) pointRowMap->getLocalNumElements();
1018 LO block_rows = (LO) blockRowMap->getLocalNumElements();
1019 auto blockValues = blockMatrix.getValuesDevice();
1020 auto blockLocalGraph = blockGraph.getLocalGraphDevice();
1021 row_map_type_const blockRowptr = blockLocalGraph.row_map;
1022 entries_type_const blockColind = blockLocalGraph.entries;
1023
1024 // Generate the point matrix rowptr / colind / values
1025 row_map_type rowptr("row_map", point_rows+1);
1026 entries_type colind("entries", point_nnz);
1027 values_type values("values", point_nnz);
1028
1029 // Pre-fill the rowptr
1030 Kokkos::parallel_for("fillRowPtr",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
1031 offset_type base = blockRowptr[i];
1032 offset_type diff = blockRowptr[i+1] - base;
1033 for(LO j=0; j<blocksize; j++) {
1034 rowptr[i*blocksize +j] = base*bs2 + j*diff*blocksize;
1035 }
1036
1037 // Fill the last guy, if we're on the final entry
1038 if(i==block_rows-1) {
1039 rowptr[block_rows*blocksize] = blockRowptr[i+1] * bs2;
1040 }
1041 });
1042
1043
1044 Kokkos::parallel_for("copyEntriesAndValues",range_type(0,block_rows),KOKKOS_LAMBDA(const LO i) {
1045 const offset_type blkBeg = blockRowptr[i];
1046 const offset_type numBlocks = blockRowptr[i+1] - blkBeg;
1047
1048 // For each block in the row...
1049 for (offset_type block=0; block < numBlocks; block++) {
1050 column_type point_col_base = blockColind[blkBeg + block] * blocksize;
1051 little_block_type my_block(blockValues.data () + (blkBeg+block) * bs2, blocksize, blocksize);
1052
1053 // For each entry in the block...
1054 for(LO little_row=0; little_row<blocksize; little_row++) {
1055 offset_type point_row_offset = rowptr[i*blocksize + little_row];
1056 for(LO little_col=0; little_col<blocksize; little_col++) {
1057 colind[point_row_offset + block*blocksize + little_col] = point_col_base + little_col;
1058 values[point_row_offset + block*blocksize + little_col] = my_block(little_row,little_col);
1059 }
1060 }
1061
1062 }
1063 });
1064
1065 // Generate the matrix
1066 RCP<crs_matrix_type> pointCrsMatrix = rcp(new crs_matrix_type(pointRowMap, pointColMap, 0));
1067 pointCrsMatrix->setAllValues(rowptr,colind,values);
1068
1069 // FUTURE OPTIMIZATION: Directly compute import/export, rather than letting ESFC do it
1070 pointCrsMatrix->expertStaticFillComplete(pointDomainMap,pointRangeMap);
1071
1072 return pointCrsMatrix;
1073 }
1074
1075
1076} // namespace Tpetra
1077
1078//
1079// Explicit instantiation macro for blockCrsMatrixWriter (various
1080// overloads), writeMatrixStrip, and convertToBlockCrsMatrix.
1081//
1082// Must be expanded from within the Tpetra namespace!
1083//
1084#define TPETRA_BLOCKCRSMATRIX_HELPERS_INSTANT(S,LO,GO,NODE) \
1085 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::string const &fileName); \
1086 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::string const &fileName, Teuchos::ParameterList const &params); \
1087 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os); \
1088 template void blockCrsMatrixWriter(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
1089 template void writeMatrixStrip(BlockCrsMatrix<S,LO,GO,NODE> const &A, std::ostream &os, Teuchos::ParameterList const &params); \
1090 template Teuchos::RCP<BlockCrsMatrix<S, LO, GO, NODE> > convertToBlockCrsMatrix(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize, bool use_LID); \
1091 template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > fillLogicalBlocks(const CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize); \
1092 template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > unfillFormerBlockCrs(const CrsMatrix<S, LO, GO, NODE>& pointMatrix); \
1093 template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE> > convertToCrsMatrix(const BlockCrsMatrix<S, LO, GO, NODE>& blockMatrix); \
1094 template Teuchos::RCP<CrsGraph<LO, GO, NODE>> getBlockCrsGraph(const Tpetra::CrsMatrix<S, LO, GO, NODE>& pointMatrix, const LO &blockSize, bool use_local_ID);
1095
1096//
1097// Explicit instantiation macro for createMeshMap / createPointMap
1098//
1099#define TPETRA_CREATEMESHMAP_INSTANT(LO,GO,NODE) \
1100 template Teuchos::RCP<const Map<LO,GO,NODE> > createMeshMap (const LO& blockSize, const Map<LO,GO,NODE>& pointMap, bool use_local_ID); \
1101 template Teuchos::RCP<const Map<LO,GO,NODE> > createPointMap (const LO& blockSize, const Map<LO,GO,NODE>& blockMap);
1102
1103#endif // TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
virtual LO getBlockSize() const override
The number of degrees of freedom per mesh point.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
size_t getLocalNumRows() const override
get the local number of block rows
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
global_size_t getGlobalNumRows() const override
get the global number of block rows
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
ValueType get(const KeyType key)
Get the value corresponding to the given key.
void add(const KeyType key, const ValueType value)
Add a key and its value to the hash table.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map's device.
One or more distributed dense vectors.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, Node > > fillLogicalBlocks(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize)
Fill all point entries in a logical block of a CrsMatrix with zeroes. This should be called before co...
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, Node > > unfillFormerBlockCrs(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix)
Unfill all point entries in a logical block of a CrsMatrix with zeroes. This can be called after conv...
void writeMatrixStrip(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::ostream &os, Teuchos::ParameterList const &params)
Helper function called by blockCrsMatrixWriter.
Teuchos::RCP< Tpetra::CrsGraph< LO, GO, Node > > getBlockCrsGraph(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize, bool use_local_ID=true)
Non-member constructor that creates the CrsGraph of a BlockCrsMatrix from an existing point CrsMatrix...
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createPointMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &blockMap)
Helper function to generate a point map from a block map (with a given block size) GIDs associated wi...
Teuchos::RCP< CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > createCrsGraph(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, size_t maxNumEntriesPerRow=0, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Nonmember function to create an empty CrsGraph given a row Map and the max number of entries allowed ...
Teuchos::RCP< CrsMatrix< Scalar, LO, GO, Node > > convertToCrsMatrix(const Tpetra::BlockCrsMatrix< Scalar, LO, GO, Node > &blockMatrix)
Non-member constructor that creates a point CrsMatrix from an existing BlockCrsMatrix.
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createMeshMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &pointMap, bool use_local_ID=false)
Helper function to generate a mesh map from a point map. Important! It's assumed that point GIDs asso...
void blockCrsMatrixWriter(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::string const &fileName)
Helper function to write a BlockCrsMatrix. Calls the 3-argument version.
@ INSERT
Insert new values that don't currently exist.
Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > convertToBlockCrsMatrix(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize, bool use_local_ID=true)
Non-member constructor that creates a BlockCrsMatrix from an existing point CrsMatrix.