MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
11#define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
12
13#include <Kokkos_Core.hpp>
14#include <KokkosSparse_CrsMatrix.hpp>
15#include <sstream>
16#include <tuple>
17
18#include "Xpetra_Matrix.hpp"
19
21
22#include "MueLu_AmalgamationInfo.hpp"
23#include "MueLu_Exceptions.hpp"
24#include "MueLu_Level.hpp"
25#include "MueLu_LWGraph_kokkos.hpp"
26#include "MueLu_MasterList.hpp"
27#include "MueLu_Monitor.hpp"
28
29// #define MUELU_COALESCE_DROP_DEBUG 1
30
33#include "MueLu_CutDrop.hpp"
37
38namespace MueLu {
39
40template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
42 RCP<ParameterList> validParamList = rcp(new ParameterList());
43
44#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
45 SET_VALID_ENTRY("aggregation: drop tol");
46 SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
47 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
48 SET_VALID_ENTRY("aggregation: greedy Dirichlet");
49 SET_VALID_ENTRY("aggregation: row sum drop tol");
50 SET_VALID_ENTRY("aggregation: drop scheme");
51 SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
52 SET_VALID_ENTRY("aggregation: distance laplacian metric");
53 SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
54 SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
55 SET_VALID_ENTRY("aggregation: distance laplacian algo");
56 SET_VALID_ENTRY("aggregation: classical algo");
57 SET_VALID_ENTRY("aggregation: coloring: localize color graph");
58
59 SET_VALID_ENTRY("filtered matrix: use lumping");
60 SET_VALID_ENTRY("filtered matrix: reuse graph");
61 SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
62
63 SET_VALID_ENTRY("filtered matrix: use root stencil");
64 SET_VALID_ENTRY("filtered matrix: use spread lumping");
65 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
66 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
67 SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
68
69#undef SET_VALID_ENTRY
70 validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
71
72 // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
73 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"))));
74 validParamList->getEntry("aggregation: classical algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
75 validParamList->getEntry("aggregation: distance laplacian algo").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "unscaled cut", "scaled cut", "scaled cut symmetric"))));
76 validParamList->getEntry("aggregation: distance laplacian metric").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("unweighted", "material"))));
77
78 validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
79 validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
80 validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
81 validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNumber");
82 validParamList->set<RCP<const FactoryBase>>("Material", Teuchos::null, "Generating factory for Material");
83
84 return validParamList;
85}
86
87template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
89 Input(currentLevel, "A");
90 Input(currentLevel, "UnAmalgamationInfo");
91
92 const ParameterList& pL = GetParameterList();
93 std::string algo = pL.get<std::string>("aggregation: drop scheme");
94 std::string distLaplMetric = pL.get<std::string>("aggregation: distance laplacian metric");
95 if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
96 Input(currentLevel, "Coordinates");
97 if (distLaplMetric == "material")
98 Input(currentLevel, "Material");
99 }
100 if (algo == "signed classical sa")
101 ;
102 else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
103 Input(currentLevel, "BlockNumber");
104 }
105}
106
107template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
109 Build(Level& currentLevel) const {
110 auto A = Get<RCP<Matrix>>(currentLevel, "A");
111 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
112 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
113
114 std::tuple<GlobalOrdinal, boundary_nodes_type> results;
115 if (blkSize == 1)
116 results = BuildScalar(currentLevel);
117 else
118 results = BuildVector(currentLevel);
119
120 if (GetVerbLevel() & Statistics1) {
121 GlobalOrdinal numDropped = std::get<0>(results);
122 auto boundaryNodes = std::get<1>(results);
123
124 GO numLocalBoundaryNodes = 0;
125 GO numGlobalBoundaryNodes = 0;
126
127 Kokkos::parallel_reduce(
128 "MueLu:CoalesceDropF:Build:bnd", range_type(0, boundaryNodes.extent(0)),
129 KOKKOS_LAMBDA(const LO i, GO& n) {
130 if (boundaryNodes(i))
131 n++;
132 },
133 numLocalBoundaryNodes);
134
135 auto comm = A->getRowMap()->getComm();
136 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
137
138 GO numGlobalTotal = A->getGlobalNumEntries();
139 GO numGlobalDropped;
140 MueLu_sumAll(comm, numDropped, numGlobalDropped);
141
142 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
143 if (numGlobalTotal != 0) {
144 GetOStream(Statistics1) << "Number of dropped entries: "
145 << numGlobalDropped << "/" << numGlobalTotal
146 << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
147 }
148 }
149}
150
151template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
152std::tuple<Teuchos::RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>>, Teuchos::RCP<Xpetra::Vector<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>>> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
153 GetBlockNumberMVs(Level& currentLevel) const {
154 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
155 RCP<LocalOrdinalVector> ghostedBlockNumber;
156 GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
157
158 // Ghost the column block numbers if we need to
159 auto A = Get<RCP<Matrix>>(currentLevel, "A");
160 RCP<const Import> importer = A->getCrsGraph()->getImporter();
161 if (!importer.is_null()) {
162 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
163 ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
164 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
165 } else {
166 ghostedBlockNumber = BlockNumber;
167 }
168 return std::make_tuple(BlockNumber, ghostedBlockNumber);
169}
170
171template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
172std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
173 BuildScalar(Level& currentLevel) const {
174 FactoryMonitor m(*this, "Build", currentLevel);
175
176 using MatrixType = Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
177 using GraphType = Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
178 using local_matrix_type = typename MatrixType::local_matrix_type;
179 using local_graph_type = typename GraphType::local_graph_type;
180 using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
181 using entries_type = typename local_graph_type::entries_type::non_const_type;
182 using values_type = typename local_matrix_type::values_type::non_const_type;
183 using device_type = typename Node::device_type;
184 using memory_space = typename device_type::memory_space;
185
186 typedef Teuchos::ScalarTraits<SC> STS;
187 typedef typename STS::magnitudeType MT;
188 const MT zero = Teuchos::ScalarTraits<MT>::zero();
189
190 auto A = Get<RCP<Matrix>>(currentLevel, "A");
191
193 // Process parameterlist
194 const ParameterList& pL = GetParameterList();
195
196 // Boundary detection
197 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
198 const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
199 const LocalOrdinal dirichletNonzeroThreshold = 1;
200
201 // Dropping
202 const std::string algo = pL.get<std::string>("aggregation: drop scheme");
203 std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
204 std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
205 std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
206 MT threshold;
207 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
208 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
209 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
210 else
211 threshold = as<MT>(pL.get<double>("aggregation: drop tol"));
212 bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
213
214 // Fill
215 const bool lumping = pL.get<bool>("filtered matrix: use lumping");
216 const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
217 const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
218
219 const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
220 const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
221
222 const MT filteringDirichletThreshold = as<MT>(pL.get<double>("filtered matrix: Dirichlet threshold"));
223 TEUCHOS_ASSERT(!useRootStencil);
224 TEUCHOS_ASSERT(!useSpreadLumping);
225
226 if (algo == "classical")
227 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
228 else if (algo == "distance laplacian")
229 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\" distance laplacian metric = \"" << distanceLaplacianMetric << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
230 else
231 GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
232
233 if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos)))
234 TEUCHOS_TEST_FOR_EXCEPTION(threshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
235
237 // We perform four sweeps over the rows of A:
238 // Pass 1: detection of boundary nodes
239 // Pass 2: diagonal extraction
240 // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
241 // Pass 4: fill of the filtered matrix
242 //
243 // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
244
245 // TODO: We could merge pass 1 and 2.
246
247 auto crsA = toCrsMatrix(A);
248 auto lclA = crsA->getLocalMatrixDevice();
249 auto range = range_type(0, lclA.numRows());
250
252 // Pass 1: Detect boundary nodes
253 //
254 // The following criteria are available:
255 // - BoundaryDetection::PointDirichletFunctor
256 // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
257 // - BoundaryDetection::RowSumFunctor
258 // Marks rows as Dirichlet bases on row-sum criterion
259
260 // Dirichlet nodes
261 auto boundaryNodes = boundary_nodes_type("boundaryNodes", lclA.numRows()); // initialized to false
262 {
263 SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
264
265 // macro that applies boundary detection functors
266#define MueLu_runBoundaryFunctors(...) \
267 { \
268 auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
269 Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
270 }
271
272 auto dirichlet_detection = BoundaryDetection::PointDirichletFunctor(lclA, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
273
274 if (rowSumTol <= 0.) {
275 MueLu_runBoundaryFunctors(dirichlet_detection);
276 } else {
277 auto apply_rowsum = BoundaryDetection::RowSumFunctor(lclA, boundaryNodes, rowSumTol);
278 MueLu_runBoundaryFunctors(dirichlet_detection,
279 apply_rowsum);
280 }
281#undef MueLu_runBoundaryFunctors
282 }
283 // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
284 // Otherwise we're now done with it now.
285
287 // Pass 2 & 3: Diagonal extraction and determine dropping and construct
288 // rowptr of filtered matrix
289 //
290 // The following criteria are available:
291 // - Misc::PointwiseDropBoundaryFunctor
292 // Drop all rows that have been marked as Dirichlet
293 // - Misc::DropOffRankFunctor
294 // Drop all entries that are off-rank
295 // - ClassicalDropping::SAFunctor
296 // Classical dropping
297 // - ClassicalDropping::SignedRSFunctor
298 // Classical RS dropping
299 // - ClassicalDropping::SignedSAFunctor
300 // Classical signed SA dropping
301 // - DistanceLaplacian::DropFunctor
302 // Distance Laplacian dropping
303 // - Misc::KeepDiagonalFunctor
304 // Mark diagonal as KEEP
305 // - Misc::MarkSingletonFunctor
306 // Mark singletons after dropping as Dirichlet
307 // - Misc::BlockDiagonalizeFunctor
308 // Drop coupling between blocks
309 //
310 // For the block diagonal variants we first block diagonalized and then apply "blocksize = 1" algorithms.
311
312 // rowptr of filtered A
313 auto filtered_rowptr = rowptr_type("filtered_rowptr", lclA.numRows() + 1);
314 // Number of nonzeros of filtered A
315 LocalOrdinal nnz_filtered = 0;
316 // dropping decisions for each entry
317 auto results = Kokkos::View<DecisionType*, memory_space>("results", lclA.nnz()); // initialized to UNDECIDED
318 {
319 SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
320
321 std::string functorLabel = "MueLu::CoalesceDrop::CountEntries";
322
323 // macro that applied dropping functors
324#if !defined(HAVE_MUELU_DEBUG)
325#define MueLu_runDroppingFunctors(...) \
326 { \
327 auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__); \
328 Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz_filtered); \
329 }
330#else
331#define MueLu_runDroppingFunctors(...) \
332 { \
333 auto debug = Misc::DebugFunctor(lclA, results); \
334 auto countingFunctor = MatrixConstruction::PointwiseCountingFunctor(lclA, results, filtered_rowptr, __VA_ARGS__, debug); \
335 Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz_filtered); \
336 }
337#endif
338
339 auto drop_boundaries = Misc::PointwiseDropBoundaryFunctor(lclA, boundaryNodes, results);
340
341 if (threshold != zero) {
342 auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results);
343 auto mark_singletons_as_boundary = Misc::MarkSingletonFunctor(lclA, boundaryNodes, results);
344
345 if (algo == "classical" || algo == "block diagonal classical") {
346 if (algo == "block diagonal classical") {
347 auto BlockNumbers = GetBlockNumberMVs(currentLevel);
348 auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
349
350 if (classicalAlgoStr == "default") {
351 auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results);
352
353 if (aggregationMayCreateDirichlet) {
354 MueLu_runDroppingFunctors(block_diagonalize,
355 classical_dropping,
356 drop_boundaries,
357 preserve_diagonals,
358 mark_singletons_as_boundary);
359 } else {
360 MueLu_runDroppingFunctors(block_diagonalize,
361 classical_dropping,
362 drop_boundaries,
363 preserve_diagonals);
364 }
365 } else if (classicalAlgoStr == "unscaled cut") {
366 auto comparison = CutDrop::UnscaledComparison(*A, results);
367 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
368
369 MueLu_runDroppingFunctors(block_diagonalize,
370 drop_boundaries,
371 preserve_diagonals,
372 cut_drop);
373 } else if (classicalAlgoStr == "scaled cut") {
374 auto comparison = CutDrop::ScaledComparison(*A, results);
375 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
376
377 MueLu_runDroppingFunctors(block_diagonalize,
378 drop_boundaries,
379 preserve_diagonals,
380 cut_drop);
381 } else if (classicalAlgoStr == "scaled cut symmetric") {
382 auto comparison = CutDrop::ScaledComparison(*A, results);
383 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
384
385 MueLu_runDroppingFunctors(block_diagonalize,
386 drop_boundaries,
387 preserve_diagonals,
388 cut_drop);
389
390 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
391
392 MueLu_runDroppingFunctors(symmetrize);
393
394 } else {
395 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\"");
396 }
397 } else {
398 if (classicalAlgoStr == "default") {
399 auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results);
400
401 if (aggregationMayCreateDirichlet) {
402 MueLu_runDroppingFunctors(classical_dropping,
403 drop_boundaries,
404 preserve_diagonals,
405 mark_singletons_as_boundary);
406 } else {
407 MueLu_runDroppingFunctors(classical_dropping,
408 drop_boundaries,
409 preserve_diagonals);
410 }
411 } else if (classicalAlgoStr == "unscaled cut") {
412 auto comparison = CutDrop::UnscaledComparison(*A, results);
413 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
414
415 MueLu_runDroppingFunctors(drop_boundaries,
416 preserve_diagonals,
417 cut_drop);
418 } else if (classicalAlgoStr == "scaled cut") {
419 auto comparison = CutDrop::ScaledComparison(*A, results);
420 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
421
422 MueLu_runDroppingFunctors(drop_boundaries,
423 preserve_diagonals,
424 cut_drop);
425 } else if (classicalAlgoStr == "scaled cut symmetric") {
426 auto comparison = CutDrop::ScaledComparison(*A, results);
427 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
428
429 MueLu_runDroppingFunctors(drop_boundaries,
430 preserve_diagonals,
431 cut_drop);
432
433 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
434
435 MueLu_runDroppingFunctors(symmetrize);
436
437 } else {
438 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\"");
439 }
440 }
441 } else if (algo == "signed classical" || algo == "block diagonal signed classical" || algo == "block diagonal colored signed classical") {
442 auto signed_classical_rs_dropping = ClassicalDropping::SignedRSFunctor(*A, threshold, results);
443
444 if (algo == "block diagonal signed classical" || algo == "block diagonal colored signed classical") {
445 auto BlockNumbers = GetBlockNumberMVs(currentLevel);
446 auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
447
448 if (classicalAlgoStr == "default") {
449 if (aggregationMayCreateDirichlet) {
450 MueLu_runDroppingFunctors(block_diagonalize,
451 signed_classical_rs_dropping,
452 drop_boundaries,
453 preserve_diagonals,
454 mark_singletons_as_boundary);
455
456 } else {
457 MueLu_runDroppingFunctors(block_diagonalize,
458 signed_classical_rs_dropping,
459 drop_boundaries,
460 preserve_diagonals);
461 }
462 } else {
463 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\"");
464 }
465 } else {
466 if (classicalAlgoStr == "default") {
467 if (aggregationMayCreateDirichlet) {
468 MueLu_runDroppingFunctors(signed_classical_rs_dropping,
469 drop_boundaries,
470 preserve_diagonals,
471 mark_singletons_as_boundary);
472
473 } else {
474 MueLu_runDroppingFunctors(signed_classical_rs_dropping,
475 drop_boundaries,
476 preserve_diagonals);
477 }
478 } else {
479 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\"");
480 }
481 }
482 } else if (algo == "signed classical sa") {
483 if (classicalAlgoStr == "default") {
484 auto signed_classical_sa_dropping = ClassicalDropping::SignedSAFunctor(*A, threshold, results);
485
486 if (aggregationMayCreateDirichlet) {
487 MueLu_runDroppingFunctors(signed_classical_sa_dropping,
488 drop_boundaries,
489 preserve_diagonals,
490 mark_singletons_as_boundary);
491
492 } else {
493 MueLu_runDroppingFunctors(signed_classical_sa_dropping,
494 drop_boundaries,
495 preserve_diagonals);
496 }
497 } else {
498 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be default, not \"" << classicalAlgoStr << "\"");
499 }
500 } else if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
501 using doubleMultiVector = Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO>;
502 auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
503
504 if (algo == "block diagonal distance laplacian") {
505 auto BlockNumbers = GetBlockNumberMVs(currentLevel);
506 auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
507
508 auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
509
510 if (distanceLaplacianAlgoStr == "default") {
511 auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
512
513 if (aggregationMayCreateDirichlet) {
514 MueLu_runDroppingFunctors(block_diagonalize,
515 dist_laplacian_dropping,
516 drop_boundaries,
517 preserve_diagonals,
518 mark_singletons_as_boundary);
519 } else {
520 MueLu_runDroppingFunctors(block_diagonalize,
521 dist_laplacian_dropping,
522 drop_boundaries,
523 preserve_diagonals);
524 }
525 } else if (distanceLaplacianAlgoStr == "unscaled cut") {
526 auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
527 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
528
529 MueLu_runDroppingFunctors(block_diagonalize,
530 drop_boundaries,
531 preserve_diagonals,
532 cut_drop);
533 } else if (distanceLaplacianAlgoStr == "scaled cut") {
534 auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
535 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
536
537 MueLu_runDroppingFunctors(block_diagonalize,
538 drop_boundaries,
539 preserve_diagonals,
540 cut_drop);
541 } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
542 auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
543 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
544
545 MueLu_runDroppingFunctors(block_diagonalize,
546 drop_boundaries,
547 cut_drop,
548 preserve_diagonals);
549
550 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
551
552 MueLu_runDroppingFunctors(symmetrize);
553 } else {
554 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\"");
555 }
556 } else {
557 if (distanceLaplacianAlgoStr == "default") {
558 if (distanceLaplacianMetric == "unweighted") {
559 auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
560 auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
561
562 if (aggregationMayCreateDirichlet) {
563 MueLu_runDroppingFunctors(dist_laplacian_dropping,
564 drop_boundaries,
565 preserve_diagonals,
566 mark_singletons_as_boundary);
567 } else {
568 MueLu_runDroppingFunctors(dist_laplacian_dropping,
569 drop_boundaries,
570 preserve_diagonals);
571 }
572 } else if (distanceLaplacianMetric == "material") {
573 auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
574 if (material->getNumVectors() == 1) {
575 GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
576
577 auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
578 auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
579
580 if (aggregationMayCreateDirichlet) {
581 MueLu_runDroppingFunctors(dist_laplacian_dropping,
582 drop_boundaries,
583 preserve_diagonals,
584 mark_singletons_as_boundary);
585 } else {
586 MueLu_runDroppingFunctors(dist_laplacian_dropping,
587 drop_boundaries,
588 preserve_diagonals);
589 }
590 } else {
591 TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
592
593 {
594 std::stringstream ss;
595 ss << "material tensor mean =" << std::endl;
596 size_t k = 0;
597 for (size_t i = 0; i < coords->getNumVectors(); ++i) {
598 ss << " ";
599 for (size_t j = 0; j < coords->getNumVectors(); ++j) {
600 ss << material->getVector(k)->meanValue() << " ";
601 ++k;
602 }
603 ss << std::endl;
604 }
605 GetOStream(Runtime0) << ss.str();
606 }
607
608 auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
609 auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
610
611 if (aggregationMayCreateDirichlet) {
612 MueLu_runDroppingFunctors(dist_laplacian_dropping,
613 drop_boundaries,
614 preserve_diagonals,
615 mark_singletons_as_boundary);
616 } else {
617 MueLu_runDroppingFunctors(dist_laplacian_dropping,
618 drop_boundaries,
619 preserve_diagonals);
620 }
621 }
622 }
623
624 } else if (distanceLaplacianAlgoStr == "unscaled cut") {
625 if (distanceLaplacianMetric == "unweighted") {
626 auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
627 auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
628 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
629
630 MueLu_runDroppingFunctors(drop_boundaries,
631 preserve_diagonals,
632 cut_drop);
633 } else if (distanceLaplacianMetric == "material") {
634 auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
635 if (material->getNumVectors() == 1) {
636 GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
637
638 auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
639 auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
640 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
641
642 MueLu_runDroppingFunctors(drop_boundaries,
643 preserve_diagonals,
644 cut_drop);
645 } else {
646 TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
647
648 {
649 std::stringstream ss;
650 ss << "material tensor mean =" << std::endl;
651 size_t k = 0;
652 for (size_t i = 0; i < coords->getNumVectors(); ++i) {
653 ss << " ";
654 for (size_t j = 0; j < coords->getNumVectors(); ++j) {
655 ss << material->getVector(k)->meanValue() << " ";
656 ++k;
657 }
658 ss << std::endl;
659 }
660 GetOStream(Runtime0) << ss.str();
661 }
662
663 auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
664 auto comparison = CutDrop::UnscaledDistanceLaplacianComparison(*A, dist2, results);
665 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
666
667 MueLu_runDroppingFunctors(drop_boundaries,
668 preserve_diagonals,
669 cut_drop);
670 }
671 }
672 } else if (distanceLaplacianAlgoStr == "scaled cut") {
673 if (distanceLaplacianMetric == "unweighted") {
674 auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
675 auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
676 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
677
678 MueLu_runDroppingFunctors(drop_boundaries,
679 preserve_diagonals,
680 cut_drop);
681 } else if (distanceLaplacianMetric == "material") {
682 auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
683 if (material->getNumVectors() == 1) {
684 GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
685
686 auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
687 auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
688 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
689
690 MueLu_runDroppingFunctors(drop_boundaries,
691 preserve_diagonals,
692 cut_drop);
693 } else {
694 TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
695
696 {
697 std::stringstream ss;
698 ss << "material tensor mean =" << std::endl;
699 size_t k = 0;
700 for (size_t i = 0; i < coords->getNumVectors(); ++i) {
701 ss << " ";
702 for (size_t j = 0; j < coords->getNumVectors(); ++j) {
703 ss << material->getVector(k)->meanValue() << " ";
704 ++k;
705 }
706 ss << std::endl;
707 }
708 GetOStream(Runtime0) << ss.str();
709 }
710
711 auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
712 auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
713 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
714
715 MueLu_runDroppingFunctors(drop_boundaries,
716 preserve_diagonals,
717 cut_drop);
718 }
719 }
720 } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
721 if (distanceLaplacianMetric == "unweighted") {
722 auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
723 auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
724 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
725
726 MueLu_runDroppingFunctors(drop_boundaries,
727 preserve_diagonals,
728 cut_drop);
729 } else if (distanceLaplacianMetric == "material") {
730 auto material = Get<RCP<MultiVector>>(currentLevel, "Material");
731 if (material->getNumVectors() == 1) {
732 GetOStream(Runtime0) << "material scalar mean = " << material->getVector(0)->meanValue() << std::endl;
733
734 auto dist2 = DistanceLaplacian::ScalarMaterialDistanceFunctor(*A, coords, material);
735 auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
736 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
737
738 MueLu_runDroppingFunctors(drop_boundaries,
739 preserve_diagonals,
740 cut_drop);
741 } else {
742 TEUCHOS_TEST_FOR_EXCEPTION(coords->getNumVectors() * coords->getNumVectors() != material->getNumVectors(), Exceptions::RuntimeError, "Need \"Material\" to have spatialDim^2 vectors.");
743
744 {
745 std::stringstream ss;
746 ss << "material tensor mean =" << std::endl;
747 size_t k = 0;
748 for (size_t i = 0; i < coords->getNumVectors(); ++i) {
749 ss << " ";
750 for (size_t j = 0; j < coords->getNumVectors(); ++j) {
751 ss << material->getVector(k)->meanValue() << " ";
752 ++k;
753 }
754 ss << std::endl;
755 }
756 GetOStream(Runtime0) << ss.str();
757 }
758
759 auto dist2 = DistanceLaplacian::TensorMaterialDistanceFunctor(*A, coords, material);
760 auto comparison = CutDrop::ScaledDistanceLaplacianComparison(*A, dist2, results);
761 auto cut_drop = CutDrop::CutDropFunctor(comparison, threshold);
762
763 MueLu_runDroppingFunctors(drop_boundaries,
764 preserve_diagonals,
765 cut_drop);
766 }
767 }
768
769 auto symmetrize = Misc::SymmetrizeFunctor(lclA, results);
770
771 MueLu_runDroppingFunctors(symmetrize);
772 } else {
773 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\"");
774 }
775 }
776 } else if (algo == "block diagonal") {
777 auto BlockNumbers = GetBlockNumberMVs(currentLevel);
778 auto block_diagonalize = Misc::BlockDiagonalizeFunctor(*A, *std::get<0>(BlockNumbers), *std::get<1>(BlockNumbers), results);
779
780 MueLu_runDroppingFunctors(block_diagonalize);
781 } else {
782 TEUCHOS_ASSERT(false);
783 }
784 } else {
785 Kokkos::deep_copy(results, KEEP);
786 // FIXME: This seems inconsistent
787 // MueLu_runDroppingFunctors(drop_boundaries);
788 auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
790 }
791#undef MueLu_runDroppingFunctors
792 }
793 GO numDropped = lclA.nnz() - nnz_filtered;
794 // We now know the number of entries of filtered A and have the final rowptr.
795
797 // Pass 4: Create local matrix for filtered A
798 //
799 // Dropped entries are optionally lumped to the diagonal.
800
801 RCP<Matrix> filteredA;
802 RCP<LWGraph_kokkos> graph;
803 {
804 SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
805
806 local_matrix_type lclFilteredA;
807 local_graph_type lclGraph;
808 if (reuseGraph) {
809 filteredA = MatrixFactory::BuildCopy(A);
810 lclFilteredA = filteredA->getLocalMatrixDevice();
811
812 auto colidx = entries_type("entries", nnz_filtered);
813 lclGraph = local_graph_type(colidx, filtered_rowptr);
814 } else {
815 auto colidx = entries_type("entries", nnz_filtered);
816 auto values = values_type("values", nnz_filtered);
817 lclFilteredA = local_matrix_type("filteredA",
818 lclA.numRows(), lclA.numCols(),
819 nnz_filtered,
820 values, filtered_rowptr, colidx);
821 }
822
823 if (lumping) {
824 if (reuseGraph) {
825 auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, true>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
826 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
827 } else {
828 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, true>(lclA, results, lclFilteredA, filteringDirichletThreshold);
829 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
830 }
831 } else {
832 if (reuseGraph) {
833 auto fillFunctor = MatrixConstruction::PointwiseFillReuseFunctor<local_matrix_type, local_graph_type, false>(lclA, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
834 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
835 } else {
836 auto fillFunctor = MatrixConstruction::PointwiseFillNoReuseFunctor<local_matrix_type, false>(lclA, results, lclFilteredA, filteringDirichletThreshold);
837 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
838 }
839 }
840
841 if (!reuseGraph)
842 filteredA = MatrixFactory::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
843 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
844
845 if (reuseEigenvalue) {
846 // Reuse max eigenvalue from A
847 // It is unclear what eigenvalue is the best for the smoothing, but we already may have
848 // the D^{-1}A estimate in A, may as well use it.
849 // NOTE: ML does that too
850 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
851 } else {
852 filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
853 }
854
855 if (!reuseGraph) {
856 // Use graph of filteredA as graph.
857 lclGraph = filteredA->getCrsGraph()->getLocalGraphDevice();
858 }
859 graph = rcp(new LWGraph_kokkos(lclGraph, filteredA->getRowMap(), filteredA->getColMap(), "amalgamated graph of A"));
860 graph->SetBoundaryNodeMap(boundaryNodes);
861 }
862
863 LO dofsPerNode = 1;
864 Set(currentLevel, "DofsPerNode", dofsPerNode);
865 Set(currentLevel, "Graph", graph);
866 Set(currentLevel, "A", filteredA);
867
868 return std::make_tuple(numDropped, boundaryNodes);
869}
870
871template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
872std::tuple<GlobalOrdinal, typename MueLu::LWGraph_kokkos<LocalOrdinal, GlobalOrdinal, Node>::boundary_nodes_type> CoalesceDropFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
873 BuildVector(Level& currentLevel) const {
874 FactoryMonitor m(*this, "Build", currentLevel);
875
876 using MatrixType = Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
877 using GraphType = Xpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
878 using local_matrix_type = typename MatrixType::local_matrix_type;
879 using local_graph_type = typename GraphType::local_graph_type;
880 using rowptr_type = typename local_graph_type::row_map_type::non_const_type;
881 using entries_type = typename local_graph_type::entries_type::non_const_type;
882 using values_type = typename local_matrix_type::values_type::non_const_type;
883 using device_type = typename Node::device_type;
884 using memory_space = typename device_type::memory_space;
885
886 typedef Teuchos::ScalarTraits<SC> STS;
887 typedef typename STS::magnitudeType MT;
888 const MT zero = Teuchos::ScalarTraits<MT>::zero();
889
890 auto A = Get<RCP<Matrix>>(currentLevel, "A");
891
892 /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
893 blkSize is the number of storage blocks that must kept together during the amalgamation process.
894
895 Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
896
897 numPDEs = blkSize * storageblocksize.
898
899 If numPDEs==1
900 Matrix is point storage (classical CRS storage). storageblocksize=1 and blkSize=1
901 No other values makes sense.
902
903 If numPDEs>1
904 If matrix uses point storage, then storageblocksize=1 and blkSize=numPDEs.
905 If matrix uses block storage, with block size of n, then storageblocksize=n, and blkSize=numPDEs/n.
906 Thus far, only storageblocksize=numPDEs and blkSize=1 has been tested.
907 */
908
909 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
910 LO blkSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
911
912 auto amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
913
914 const RCP<const Map> rowMap = A->getRowMap();
915 const RCP<const Map> colMap = A->getColMap();
916
917 // build a node row map (uniqueMap = non-overlapping) and a node column map
918 // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
919 // stored in the AmalgamationInfo class container contain the local node id
920 // given a local dof id. The data is calculated in the AmalgamationFactory and
921 // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
922 const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
923 const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
924 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
925 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
926
927 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
928 rowTranslationView(rowTranslationArray.getRawPtr(), rowTranslationArray.size());
929 Kokkos::View<LO*, Kokkos::MemoryUnmanaged>
930 colTranslationView(colTranslationArray.getRawPtr(), colTranslationArray.size());
931
932 // get number of local nodes
933 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
934 typedef typename Kokkos::View<LocalOrdinal*, typename Node::device_type> id_translation_type;
935 id_translation_type rowTranslation("dofId2nodeId", rowTranslationArray.size());
936 id_translation_type colTranslation("ov_dofId2nodeId", colTranslationArray.size());
937 Kokkos::deep_copy(rowTranslation, rowTranslationView);
938 Kokkos::deep_copy(colTranslation, colTranslationView);
939
940 // extract striding information
941 blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
942 LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
943 LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
944 if (A->IsView("stridedMaps") == true) {
945 const RCP<const Map> myMap = A->getRowMap("stridedMaps");
946 const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
947 TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
948 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
949 blkId = strMap->getStridedBlockId();
950 if (blkId > -1)
951 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
952 }
953
954 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() % blkPartSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getLocalNumElements() << " but should be a multiple of " << blkPartSize);
955
957 // Process parameterlist
958 const ParameterList& pL = GetParameterList();
959
960 // Boundary detection
961 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
962 const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
963 const LocalOrdinal dirichletNonzeroThreshold = 1;
964 const bool useGreedyDirichlet = pL.get<bool>("aggregation: greedy Dirichlet");
965 TEUCHOS_TEST_FOR_EXCEPTION(rowSumTol > zero, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: RowSum is not implemented for vectorial problems.");
966
967 // Dropping
968 const std::string algo = pL.get<std::string>("aggregation: drop scheme");
969 std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
970 std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
971 std::string distanceLaplacianMetric = pL.get<std::string>("aggregation: distance laplacian metric");
972 MT threshold;
973 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
974 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
975 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
976 else
977 threshold = as<MT>(pL.get<double>("aggregation: drop tol"));
978 bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
979
980 // Fill
981 const bool lumping = pL.get<bool>("filtered matrix: use lumping");
982 const bool reuseGraph = pL.get<bool>("filtered matrix: reuse graph");
983 const bool reuseEigenvalue = pL.get<bool>("filtered matrix: reuse eigenvalue");
984
985 const bool useRootStencil = pL.get<bool>("filtered matrix: use root stencil");
986 const bool useSpreadLumping = pL.get<bool>("filtered matrix: use spread lumping");
987
988 const MT filteringDirichletThreshold = as<MT>(pL.get<double>("filtered matrix: Dirichlet threshold"));
989
990 TEUCHOS_ASSERT(!useRootStencil);
991 TEUCHOS_ASSERT(!useSpreadLumping);
992
993 if (algo == "classical") {
994 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
995 } else if (algo == "distance laplacian") {
996 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\" distance laplacian metric = \"" << distanceLaplacianMetric << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
997 } else
998 GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
999
1001 // We perform four sweeps over the rows of A:
1002 // Pass 1: detection of boundary nodes
1003 // Pass 2: diagonal extraction
1004 // Pass 3: drop decision for each entry and construction of the rowptr of the filtered matrix
1005 // Pass 4: fill of the filtered matrix
1006 //
1007 // Pass 1 and 3 apply a sequence of criteria to each row of the matrix.
1008
1009 // TODO: We could merge pass 1 and 2.
1010
1011 auto crsA = toCrsMatrix(A);
1012 auto lclA = crsA->getLocalMatrixDevice();
1013 auto range = range_type(0, numNodes);
1014
1016 // Pass 1: Detect boundary nodes
1017 //
1018 // The following criteria are available:
1019 // - BoundaryDetection::VectorDirichletFunctor
1020 // Marks rows as Dirichlet based on value threshold and number of off-diagonal entries
1021
1022 // Dirichlet nodes
1023 auto boundaryNodes = boundary_nodes_type("boundaryNodes", numNodes); // initialized to false
1024 {
1025 SubFactoryMonitor mBoundary(*this, "Boundary detection", currentLevel);
1026
1027#define MueLu_runBoundaryFunctors(...) \
1028 { \
1029 auto boundaries = BoundaryDetection::BoundaryFunctor(lclA, __VA_ARGS__); \
1030 Kokkos::parallel_for("CoalesceDrop::BoundaryDetection", range, boundaries); \
1031 }
1032
1033 if (useGreedyDirichlet) {
1034 auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, true>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
1035 MueLu_runBoundaryFunctors(dirichlet_detection);
1036 } else {
1037 auto dirichlet_detection = BoundaryDetection::VectorDirichletFunctor<local_matrix_type, false>(lclA, blkPartSize, boundaryNodes, dirichletThreshold, dirichletNonzeroThreshold);
1038 MueLu_runBoundaryFunctors(dirichlet_detection);
1039 }
1040#undef MueLu_runBoundaryFunctors
1041 }
1042 // In what follows, boundaryNodes can still still get modified if aggregationMayCreateDirichlet == true.
1043 // Otherwise we're now done with it now.
1044
1046 // Pass 2 & 3: Diagonal extraction and determine dropping and construct
1047 // rowptr of filtered matrix
1048 //
1049 // The following criteria are available:
1050 // - Misc::VectorDropBoundaryFunctor
1051 // Drop all rows that have been marked as Dirichlet
1052 // - Misc::DropOffRankFunctor
1053 // Drop all entries that are off-rank
1054 // - ClassicalDropping::SAFunctor
1055 // Classical dropping
1056 // - ClassicalDropping::SignedRSFunctor
1057 // Classical RS dropping
1058 // - ClassicalDropping::SignedSAFunctor
1059 // Classical signed SA dropping
1060 // - DistanceLaplacian::DropFunctor
1061 // Distance Laplacian dropping
1062 // - Misc::KeepDiagonalFunctor
1063 // Mark diagonal as KEEP
1064 // - Misc::MarkSingletonFunctor
1065 // Mark singletons after dropping as Dirichlet
1066
1067 // rowptr of filtered A
1068 auto filtered_rowptr = rowptr_type("rowptr", lclA.numRows() + 1);
1069 auto graph_rowptr = rowptr_type("rowptr", numNodes + 1);
1070 // Number of nonzeros of filtered A and graph
1071 Kokkos::pair<LocalOrdinal, LocalOrdinal> nnz = {0, 0};
1072
1073 // dropping decisions for each entry
1074 auto results = Kokkos::View<DecisionType*, memory_space>("results", lclA.nnz()); // initialized to UNDECIDED
1075 {
1076 SubFactoryMonitor mDropping(*this, "Dropping decisions", currentLevel);
1077
1078 std::string functorLabel = "MueLu::CoalesceDrop::CountEntries";
1079
1080#if !defined(HAVE_MUELU_DEBUG)
1081#define MueLu_runDroppingFunctors(...) \
1082 { \
1083 auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__); \
1084 Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz); \
1085 }
1086#else
1087#define MueLu_runDroppingFunctors(...) \
1088 { \
1089 auto debug = Misc::DebugFunctor(lclA, results); \
1090 auto countingFunctor = MatrixConstruction::VectorCountingFunctor(lclA, blkPartSize, colTranslation, results, filtered_rowptr, graph_rowptr, __VA_ARGS__, debug); \
1091 Kokkos::parallel_scan(functorLabel, range, countingFunctor, nnz); \
1092 }
1093#endif
1094
1095 auto drop_boundaries = Misc::VectorDropBoundaryFunctor(lclA, rowTranslation, boundaryNodes, results);
1096
1097 if (threshold != zero) {
1098 auto preserve_diagonals = Misc::KeepDiagonalFunctor(lclA, results);
1099 auto mark_singletons_as_boundary = Misc::MarkSingletonVectorFunctor(lclA, rowTranslation, boundaryNodes, results);
1100
1101 if (algo == "classical") {
1102 if (classicalAlgoStr == "default") {
1103 auto classical_dropping = ClassicalDropping::SAFunctor(*A, threshold, results);
1104
1105 if (aggregationMayCreateDirichlet) {
1106 MueLu_runDroppingFunctors(classical_dropping,
1107 // drop_boundaries,
1108 preserve_diagonals,
1109 mark_singletons_as_boundary);
1110 } else {
1111 MueLu_runDroppingFunctors(classical_dropping,
1112 // drop_boundaries,
1113 preserve_diagonals);
1114 }
1115 } else if (classicalAlgoStr == "unscaled cut") {
1116 TEUCHOS_ASSERT(false);
1117 } else if (classicalAlgoStr == "scaled cut") {
1118 TEUCHOS_ASSERT(false);
1119 } else if (classicalAlgoStr == "scaled cut symmetric") {
1120 TEUCHOS_ASSERT(false);
1121 } else {
1122 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << classicalAlgoStr << "\"");
1123 }
1124 } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
1125 auto signed_classical_rs_dropping = ClassicalDropping::SignedRSFunctor(*A, threshold, results);
1126
1127 if (aggregationMayCreateDirichlet) {
1128 MueLu_runDroppingFunctors(signed_classical_rs_dropping,
1129 // drop_boundaries,
1130 preserve_diagonals,
1131 mark_singletons_as_boundary);
1132
1133 } else {
1134 MueLu_runDroppingFunctors(signed_classical_rs_dropping,
1135 // drop_boundaries,
1136 preserve_diagonals);
1137 }
1138 } else if (algo == "signed classical sa") {
1139 auto signed_classical_sa_dropping = ClassicalDropping::SignedSAFunctor(*A, threshold, results);
1140
1141 if (aggregationMayCreateDirichlet) {
1142 MueLu_runDroppingFunctors(signed_classical_sa_dropping,
1143 // drop_boundaries,
1144 preserve_diagonals,
1145 mark_singletons_as_boundary);
1146
1147 } else {
1148 MueLu_runDroppingFunctors(signed_classical_sa_dropping,
1149 // drop_boundaries,
1150 preserve_diagonals);
1151 }
1152 } else if (algo == "distance laplacian") {
1153 using doubleMultiVector = Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO>;
1154 auto coords = Get<RCP<doubleMultiVector>>(currentLevel, "Coordinates");
1155
1156 auto dist2 = DistanceLaplacian::UnweightedDistanceFunctor(*A, coords);
1157
1158 if (distanceLaplacianAlgoStr == "default") {
1159 auto dist_laplacian_dropping = DistanceLaplacian::DropFunctor(*A, threshold, dist2, results);
1160
1161 if (aggregationMayCreateDirichlet) {
1162 MueLu_runDroppingFunctors(dist_laplacian_dropping,
1163 // drop_boundaries,
1164 preserve_diagonals,
1165 mark_singletons_as_boundary);
1166 } else {
1167 MueLu_runDroppingFunctors(dist_laplacian_dropping,
1168 // drop_boundaries,
1169 preserve_diagonals);
1170 }
1171 } else if (distanceLaplacianAlgoStr == "unscaled cut") {
1172 TEUCHOS_ASSERT(false);
1173 } else if (distanceLaplacianAlgoStr == "scaled cut") {
1174 TEUCHOS_ASSERT(false);
1175 } else if (distanceLaplacianAlgoStr == "scaled cut symmetric") {
1176 TEUCHOS_ASSERT(false);
1177 } else {
1178 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut|scaled cut symmetric), not \"" << distanceLaplacianAlgoStr << "\"");
1179 }
1180 } else {
1181 TEUCHOS_ASSERT(false);
1182 }
1183 } else {
1184 Kokkos::deep_copy(results, KEEP);
1185 // MueLu_runDroppingFunctors(drop_boundaries);
1186 auto no_op = Misc::NoOpFunctor<LocalOrdinal>();
1188 }
1189#undef MueLu_runDroppingFunctors
1190 }
1191 LocalOrdinal nnz_filtered = nnz.first;
1192 LocalOrdinal nnz_graph = nnz.second;
1193 GO numTotal = lclA.nnz();
1194 GO numDropped = numTotal - nnz_filtered;
1195 // We now know the number of entries of filtered A and have the final rowptr.
1196
1198 // Pass 4: Create local matrix for filtered A
1199 //
1200 // Dropped entries are optionally lumped to the diagonal.
1201
1202 RCP<Matrix> filteredA;
1203 RCP<LWGraph_kokkos> graph;
1204 {
1205 SubFactoryMonitor mFill(*this, "Filtered matrix fill", currentLevel);
1206
1207 local_matrix_type lclFilteredA;
1208 if (reuseGraph) {
1209 lclFilteredA = local_matrix_type("filteredA", lclA.graph, lclA.numCols());
1210 } else {
1211 auto colidx = entries_type("entries", nnz_filtered);
1212 auto values = values_type("values", nnz_filtered);
1213 lclFilteredA = local_matrix_type("filteredA",
1214 lclA.numRows(), lclA.numCols(),
1215 nnz_filtered,
1216 values, filtered_rowptr, colidx);
1217 }
1218
1219 local_graph_type lclGraph;
1220 {
1221 auto colidx = entries_type("entries", nnz_graph);
1222 lclGraph = local_graph_type(colidx, graph_rowptr);
1223 }
1224
1225 if (lumping) {
1226 if (reuseGraph) {
1227 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, true>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1228 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_reuse", range, fillFunctor);
1229 } else {
1230 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, true, false>(lclA, blkPartSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1231 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_lumped_noreuse", range, fillFunctor);
1232 }
1233 } else {
1234 if (reuseGraph) {
1235 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, true>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1236 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_reuse", range, fillFunctor);
1237 } else {
1238 auto fillFunctor = MatrixConstruction::VectorFillFunctor<local_matrix_type, false, false>(lclA, blkSize, colTranslation, results, lclFilteredA, lclGraph, filteringDirichletThreshold);
1239 Kokkos::parallel_for("MueLu::CoalesceDrop::Fill_unlumped_noreuse", range, fillFunctor);
1240 }
1241 }
1242
1243 filteredA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclFilteredA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
1244 filteredA->SetFixedBlockSize(blkSize);
1245
1246 if (reuseEigenvalue) {
1247 // Reuse max eigenvalue from A
1248 // It is unclear what eigenvalue is the best for the smoothing, but we already may have
1249 // the D^{-1}A estimate in A, may as well use it.
1250 // NOTE: ML does that too
1251 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
1252 } else {
1253 filteredA->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
1254 }
1255
1256 graph = rcp(new LWGraph_kokkos(lclGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1257 graph->SetBoundaryNodeMap(boundaryNodes);
1258 }
1259
1260 LO dofsPerNode = blkSize;
1261
1262 Set(currentLevel, "DofsPerNode", dofsPerNode);
1263 Set(currentLevel, "Graph", graph);
1264 Set(currentLevel, "A", filteredA);
1265
1266 return std::make_tuple(numDropped, boundaryNodes);
1267}
1268
1269} // namespace MueLu
1270#endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MueLu_runBoundaryFunctors(...)
#define MueLu_runDroppingFunctors(...)
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Functor for marking nodes as Dirichlet based on rowsum.
Functor for marking nodes as Dirichlet in a block operator.
Classical smoothed aggregation dropping criterion.
Signed classical Ruge-Stueben dropping criterion.
Signed classical smoothed aggregation dropping criterion.
void DeclareInput(Level &currentLevel) const
Input.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
void Build(Level &currentLevel) const
Build an object with this factory.
typename MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node >::boundary_nodes_type boundary_nodes_type
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildVector(Level &currentLevel) const
std::tuple< GlobalOrdinal, boundary_nodes_type > BuildScalar(Level &currentLevel) const
std::tuple< RCP< LocalOrdinalVector >, RCP< LocalOrdinalVector > > GetBlockNumberMVs(Level &currentLevel) const
Order each row by a criterion, compare the ratio of values and drop all entries once the ratio is bel...
Orders entries of row by .
Orders entries of row by where is the distance Laplacian.
Orders entries of row by .
Drops entries the unscaled distance Laplacian.
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
Functor does not reuse the graph of the matrix for a problem with blockSize == 1.
Functor that fills the filtered matrix while reusing the graph of the matrix before dropping,...
Functor that drops all entries that are not on the block diagonal.
Functor that marks diagonal as kept, unless the are already marked as boundary.
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
Functor that marks singletons (all off-diagonal entries in a row are dropped) as boundary.
Functor that drops boundary nodes for a blockSize == 1 problem.
Functor that symmetrizes the dropping decisions.
Functor that drops boundary nodes for a blockSize > 1 problem.
virtual const Teuchos::ParameterList & GetParameterList() const
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
VerbLevel GetVerbLevel() const
Get the verbosity level.
Namespace for MueLu classes and methods.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.