MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
11#define MUELU_COALESCEDROPFACTORY_DEF_HPP
12
13#include <Xpetra_CrsGraphFactory.hpp>
14#include <Xpetra_CrsGraph.hpp>
15#include <Xpetra_ImportFactory.hpp>
16#include <Xpetra_ExportFactory.hpp>
17#include <Xpetra_MapFactory.hpp>
18#include <Xpetra_Map.hpp>
19#include <Xpetra_Matrix.hpp>
20#include <Xpetra_MultiVectorFactory.hpp>
21#include <Xpetra_MultiVector.hpp>
22#include <Xpetra_StridedMap.hpp>
23#include <Xpetra_VectorFactory.hpp>
24#include <Xpetra_Vector.hpp>
25
26#include <Xpetra_IO.hpp>
27
28#include <Kokkos_NestedSort.hpp>
29#include <Kokkos_StdAlgorithms.hpp>
31
32#include "MueLu_AmalgamationFactory.hpp"
33#include "MueLu_AmalgamationInfo.hpp"
34#include "MueLu_Exceptions.hpp"
35#include "MueLu_LWGraph.hpp"
36
37#include "MueLu_Level.hpp"
38#include "MueLu_LWGraph.hpp"
39#include "MueLu_MasterList.hpp"
40#include "MueLu_Monitor.hpp"
41#include "MueLu_PreDropFunctionConstVal.hpp"
42#include "MueLu_Utilities.hpp"
43
44#ifdef HAVE_XPETRA_TPETRA
45#include "Tpetra_CrsGraphTransposer.hpp"
46#endif
47
48#include <algorithm>
49#include <cstdlib>
50#include <string>
51
52// If defined, read environment variables.
53// Should be removed once we are confident that this works.
54//#define DJS_READ_ENV_VARIABLES
55
56namespace MueLu {
57
58namespace Details {
59template <class real_type, class LO>
60struct DropTol {
61 DropTol() = default;
62 DropTol(DropTol const&) = default;
63 DropTol(DropTol&&) = default;
64
65 DropTol& operator=(DropTol const&) = default;
66 DropTol& operator=(DropTol&&) = default;
67
68 DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
69 : val{val_}
70 , diag{diag_}
71 , col{col_}
72 , drop{drop_} {}
73
74 real_type val{Teuchos::ScalarTraits<real_type>::zero()};
75 real_type diag{Teuchos::ScalarTraits<real_type>::zero()};
76 LO col{Teuchos::OrdinalTraits<LO>::invalid()};
77 bool drop{true};
78
79 // CMS: Auxillary information for debugging info
80 // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
81};
82} // namespace Details
83
88
89template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
91 RCP<ParameterList> validParamList = rcp(new ParameterList());
92
93#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
94 SET_VALID_ENTRY("aggregation: drop tol");
95 SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
96 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
97 SET_VALID_ENTRY("aggregation: greedy Dirichlet");
98 SET_VALID_ENTRY("aggregation: row sum drop tol");
99 SET_VALID_ENTRY("aggregation: drop scheme");
100 SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
101 SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
102 SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
103
104 {
105 // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
106 validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"))));
107 }
108 SET_VALID_ENTRY("aggregation: distance laplacian algo");
109 SET_VALID_ENTRY("aggregation: classical algo");
110 SET_VALID_ENTRY("aggregation: coloring: localize color graph");
111#undef SET_VALID_ENTRY
112 validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
113
114 validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
115 validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
116 validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
117 validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
118
119 return validParamList;
120}
121
122template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
125
126template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
128 Input(currentLevel, "A");
129 Input(currentLevel, "UnAmalgamationInfo");
130
131 const ParameterList& pL = GetParameterList();
132 if (pL.get<bool>("lightweight wrap") == true) {
133 std::string algo = pL.get<std::string>("aggregation: drop scheme");
134 if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
135 Input(currentLevel, "Coordinates");
136 }
137 if (algo == "signed classical sa")
138 ;
139 else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
140 Input(currentLevel, "BlockNumber");
141 }
142 }
143}
144
145template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
147 FactoryMonitor m(*this, "Build", currentLevel);
148
149 typedef Teuchos::ScalarTraits<SC> STS;
150 typedef typename STS::magnitudeType real_type;
151 typedef Xpetra::MultiVector<real_type, LO, GO, NO> RealValuedMultiVector;
152 typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
153
154 if (predrop_ != Teuchos::null)
155 GetOStream(Parameters0) << predrop_->description();
156
157 RCP<Matrix> realA = Get<RCP<Matrix>>(currentLevel, "A");
158 RCP<AmalgamationInfo> amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
159 const ParameterList& pL = GetParameterList();
160 bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
161
162 GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
163 std::string algo = pL.get<std::string>("aggregation: drop scheme");
164 const bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
165
166 RCP<RealValuedMultiVector> Coords;
167 RCP<Matrix> A;
168
169 bool use_block_algorithm = false;
170 LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
171 bool useSignedClassicalRS = false;
172 bool useSignedClassicalSA = false;
173 bool generateColoringGraph = false;
174
175 // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
176 // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case
177 typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
178
179 RCP<LocalOrdinalVector> ghostedBlockNumber;
180
181 if (algo == "distance laplacian") {
182 // Grab the coordinates for distance laplacian
183 Coords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
184 A = realA;
185 } else if (algo == "signed classical sa") {
186 useSignedClassicalSA = true;
187 algo = "classical";
188 A = realA;
189 } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
190 useSignedClassicalRS = true;
191 // if(realA->GetFixedBlockSize() > 1) {
192 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
193 // Ghost the column block numbers if we need to
194 RCP<const Import> importer = realA->getCrsGraph()->getImporter();
195 if (!importer.is_null()) {
196 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
197 ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
198 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
199 } else {
200 ghostedBlockNumber = BlockNumber;
201 }
202 // }
203 if (algo == "block diagonal colored signed classical")
204 generateColoringGraph = true;
205 algo = "classical";
206 A = realA;
207
208 } else if (algo == "block diagonal") {
209 // Handle the "block diagonal" filtering and then leave
210 BlockDiagonalize(currentLevel, realA, false);
211 return;
212 } else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
213 // Handle the "block diagonal" filtering, and then continue onward
214 use_block_algorithm = true;
215 RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel, realA, true);
216 if (algo == "block diagonal distance laplacian") {
217 // We now need to expand the coordinates by the interleaved blocksize
218 RCP<RealValuedMultiVector> OldCoords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
219 if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
220 LO dim = (LO)OldCoords->getNumVectors();
221 Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
222 for (LO k = 0; k < dim; k++) {
223 ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
224 ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
225 for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) {
226 LO new_base = i * dim;
227 for (LO j = 0; j < interleaved_blocksize; j++)
228 new_vec[new_base + j] = old_vec[i];
229 }
230 }
231 } else {
232 Coords = OldCoords;
233 }
234 algo = "distance laplacian";
235 } else if (algo == "block diagonal classical") {
236 algo = "classical";
237 }
238 // All cases
239 A = filteredMatrix;
240 rowSumTol = -1.0;
241 } else {
242 A = realA;
243 }
244
245 // Distance Laplacian weights
246 Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
247 enum { NO_WEIGHTS = 0,
248 SINGLE_WEIGHTS,
249 BLOCK_WEIGHTS };
250 int use_dlap_weights = NO_WEIGHTS;
251 if (algo == "distance laplacian") {
252 LO dim = (LO)Coords->getNumVectors();
253 // If anything isn't 1.0 we need to turn on the weighting
254 bool non_unity = false;
255 for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
256 if (dlap_weights[i] != 1.0) {
257 non_unity = true;
258 }
259 }
260 if (non_unity) {
261 LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
262 if ((LO)dlap_weights.size() == dim)
263 use_dlap_weights = SINGLE_WEIGHTS;
264 else if ((LO)dlap_weights.size() == blocksize * dim)
265 use_dlap_weights = BLOCK_WEIGHTS;
266 else {
267 TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
268 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
269 }
271 GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
272 }
273 }
274
275 // decide wether to use the fast-track code path for standard maps or the somewhat slower
276 // code path for non-standard maps
277 /*bool bNonStandardMaps = false;
278 if (A->IsView("stridedMaps") == true) {
279 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
280 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
281 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
282 if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
283 bNonStandardMaps = true;
284 }*/
285
286 if (doExperimentalWrap) {
287 TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
288 TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
289
290 SC threshold;
291 // If we're doing the ML-style halving of the drop tol at each level, we do that here.
292 if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
293 threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
294 else
295 threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
296
297 std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
298 std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
299 real_type realThreshold = STS::magnitude(threshold); // CMS: Rename this to "magnitude threshold" sometime
300
302 // Remove this bit once we are confident that cut-based dropping works.
303#ifdef HAVE_MUELU_DEBUG
304 int distanceLaplacianCutVerbose = 0;
305#endif
306#ifdef DJS_READ_ENV_VARIABLES
307 if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
308 distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
309 }
310
311 if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
312 auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
313 realThreshold = 1e-4 * tmp;
314 }
315
316#ifdef HAVE_MUELU_DEBUG
317 if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
318 distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
319 }
320#endif
321#endif
323
324 decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
325 decisionAlgoType classicalAlgo = defaultAlgo;
326 if (algo == "distance laplacian") {
327 if (distanceLaplacianAlgoStr == "default")
328 distanceLaplacianAlgo = defaultAlgo;
329 else if (distanceLaplacianAlgoStr == "unscaled cut")
330 distanceLaplacianAlgo = unscaled_cut;
331 else if (distanceLaplacianAlgoStr == "scaled cut")
332 distanceLaplacianAlgo = scaled_cut;
333 else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
334 distanceLaplacianAlgo = scaled_cut_symmetric;
335 else
336 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
337 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
338 } else if (algo == "classical") {
339 if (classicalAlgoStr == "default")
340 classicalAlgo = defaultAlgo;
341 else if (classicalAlgoStr == "unscaled cut")
342 classicalAlgo = unscaled_cut;
343 else if (classicalAlgoStr == "scaled cut")
344 classicalAlgo = scaled_cut;
345 else
346 TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
347 GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
348
349 } else
350 GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
351
352 if (((algo == "classical") && (classicalAlgoStr.find("scaled") != std::string::npos)) || ((algo == "distance laplacian") && (distanceLaplacianAlgoStr.find("scaled") != std::string::npos)))
353 TEUCHOS_TEST_FOR_EXCEPTION(realThreshold > 1.0, Exceptions::RuntimeError, "For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold << ", needs to be <= 1.0");
354
355 Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
356
357 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
358
359 // NOTE: We don't support signed classical RS or SA with cut drop at present
360 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
361 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
362
363 GO numDropped = 0, numTotal = 0;
364 std::string graphType = "unamalgamated"; // for description purposes only
365
366 /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
367 BlockSize is the number of storage blocks that must kept together during the amalgamation process.
368
369 Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
370
371 numPDEs = BlockSize * storageblocksize.
372
373 If numPDEs==1
374 Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1
375 No other values makes sense.
376
377 If numPDEs>1
378 If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs.
379 If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n.
380 Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested.
381 */
382 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
383 const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
384
385 /************************** RS or SA-style Classical Dropping (and variants) **************************/
386 if (algo == "classical") {
387 if (predrop_ == null) {
388 // ap: this is a hack: had to declare predrop_ as mutable
389 predrop_ = rcp(new PreDropFunctionConstVal(threshold));
390 }
391
392 if (predrop_ != null) {
393 RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
394 TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
395 "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
396 // If a user provided a predrop function, it overwrites the XML threshold parameter
397 SC newt = predropConstVal->GetThreshold();
398 if (newt != threshold) {
399 GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
400 threshold = newt;
401 }
402 }
403 // At this points we either have
404 // (predrop_ != null)
405 // Therefore, it is sufficient to check only threshold
406 if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
407 // Case 1: scalar problem, no dropping => just use matrix graph
408 RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
409 // Detect and record rows that correspond to Dirichlet boundary conditions
410 auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
411 if (rowSumTol > 0.)
412 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
413
414 graph->SetBoundaryNodeMap(boundaryNodes);
415 numTotal = A->getLocalNumEntries();
416
417 if (GetVerbLevel() & Statistics1) {
418 GO numLocalBoundaryNodes = 0;
419 GO numGlobalBoundaryNodes = 0;
420 for (size_t i = 0; i < boundaryNodes.size(); ++i)
421 if (boundaryNodes[i])
422 numLocalBoundaryNodes++;
423 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
424 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
425 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
426 }
427
428 Set(currentLevel, "DofsPerNode", 1);
429 Set(currentLevel, "Graph", graph);
430
431 } else if ((BlockSize == 1 && threshold != STS::zero()) ||
432 (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
433 (BlockSize == 1 && useSignedClassicalRS) ||
434 (BlockSize == 1 && useSignedClassicalSA)) {
435 // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
436 // graph's map information, e.g., whether index is local
437 // OR a matrix without a CrsGraph
438
439 // allocate space for the local graph
440 typename LWGraph::row_type::non_const_type rows("rows", A->getLocalNumRows() + 1);
441 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
442
443 using MT = typename STS::magnitudeType;
444 RCP<Vector> ghostedDiag;
445 ArrayRCP<const SC> ghostedDiagVals;
446 ArrayRCP<const SC> negMaxOffDiagonal;
447 // RS style needs the max negative off-diagonal, SA style needs the diagonal
448 if (useSignedClassicalRS) {
449 if (ghostedBlockNumber.is_null()) {
451 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
453 GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
454 } else {
455 auto negMaxOffDiagonalVec = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber);
456 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
458 GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
459 }
460 } else {
462 if (classicalAlgo == defaultAlgo) {
463 ghostedDiagVals = ghostedDiag->getData(0);
464 }
465 }
466 auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
467 if (rowSumTol > 0.) {
468 if (ghostedBlockNumber.is_null()) {
470 GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
471 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
472 } else {
474 GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
475 Utilities::ApplyRowSumCriterionHost(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
476 }
477 }
478
479 ArrayRCP<const LO> g_block_id;
480 if (!ghostedBlockNumber.is_null())
481 g_block_id = ghostedBlockNumber->getData(0);
482
483 LO realnnz = 0;
484 rows(0) = 0;
485 if (classicalAlgo == defaultAlgo) {
486 SubFactoryMonitor m1(*this, "Classical RS/SA", currentLevel);
487 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
488 size_t nnz = A->getNumEntriesInLocalRow(row);
489 bool rowIsDirichlet = boundaryNodes[row];
490 ArrayView<const LO> indices;
491 ArrayView<const SC> vals;
492 A->getLocalRowView(row, indices, vals);
493
494 // FIXME the current predrop function uses the following
495 // FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
496 // FIXME but the threshold doesn't take into account the rows' diagonal entries
497 // FIXME For now, hardwiring the dropping in here
498
499 LO rownnz = 0;
500 if (useSignedClassicalRS) {
501 // Signed classical RS style
502 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
503 LO col = indices[colID];
504 MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
505 MT neg_aij = -STS::real(vals[colID]);
506 /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
507 g_block_id.is_null() ? -1 : g_block_id[row],
508 g_block_id.is_null() ? -1 : g_block_id[col],
509 neg_aij, max_neg_aik);*/
510 if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
511 columns[realnnz++] = col;
512 rownnz++;
513 } else
514 numDropped++;
515 }
516 rows(row + 1) = realnnz;
517 } else if (useSignedClassicalSA) {
518 // Signed classical SA style
519 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
520 LO col = indices[colID];
521
522 bool is_nonpositive = STS::real(vals[colID]) <= 0;
523 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
524 MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
525 /*
526 if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID],
527 vals[colID],aij, aiiajj);
528 */
529
530 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
531 columns(realnnz++) = col;
532 rownnz++;
533 } else
534 numDropped++;
535 }
536 rows[row + 1] = realnnz;
537 } else {
538 // Standard abs classical
539 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
540 LO col = indices[colID];
541 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
542 MT aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2
543
544 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
545 columns(realnnz++) = col;
546 rownnz++;
547 } else
548 numDropped++;
549 }
550 rows(row + 1) = realnnz;
551 }
552 } // end for row
553 } else {
554 /* Cut Algorithm */
555 SubFactoryMonitor m1(*this, "Cut Drop", currentLevel);
556 using ExecSpace = typename Node::execution_space;
557 using TeamPol = Kokkos::TeamPolicy<ExecSpace>;
558 using TeamMem = typename TeamPol::member_type;
559 using ATS = Kokkos::ArithTraits<Scalar>;
560 using impl_scalar_type = typename ATS::val_type;
561 using implATS = Kokkos::ArithTraits<impl_scalar_type>;
562
563 // move from host to device
564 auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadOnly), Kokkos::ALL(), 0);
565 auto thresholdKokkos = static_cast<impl_scalar_type>(threshold);
566 auto realThresholdKokkos = implATS::magnitude(thresholdKokkos);
567 auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns);
568
569 auto A_device = A->getLocalMatrixDevice();
570 RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
571 RCP<const Import> importer = A->getCrsGraph()->getImporter();
572 RCP<LocalOrdinalVector> boundaryNodesVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetDomainMap());
573 RCP<LocalOrdinalVector> boundaryColumnVector;
574 for (size_t i = 0; i < graph->GetNodeNumVertices(); i++) {
575 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
576 }
577 if (!importer.is_null()) {
578 boundaryColumnVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetImportMap());
579 boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
580 } else {
581 boundaryColumnVector = boundaryNodesVector;
582 }
583 auto boundaryColumn = boundaryColumnVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
584 auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0);
585
586 Kokkos::View<LO*, ExecSpace> rownnzView("rownnzView", A_device.numRows());
587 auto drop_views = Kokkos::View<bool*, ExecSpace>("drop_views", A_device.nnz());
588 auto index_views = Kokkos::View<size_t*, ExecSpace>("index_views", A_device.nnz());
589
590 Kokkos::parallel_reduce(
591 "classical_cut", TeamPol(A_device.numRows(), Kokkos::AUTO), KOKKOS_LAMBDA(const TeamMem& teamMember, LO& globalnnz, GO& totalDropped) {
592 LO row = teamMember.league_rank();
593 auto rowView = A_device.rowConst(row);
594 size_t nnz = rowView.length;
595
596 auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
597 auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
598
599 // find magnitudes
600 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (LO)nnz), [&](const LO colID) {
601 index_view(colID) = colID;
602 LO col = rowView.colidx(colID);
603 // ignore diagonals for now, they are checked again later
604 // Don't aggregate boundaries
605 if (row == col || boundary(col)) {
606 drop_view(colID) = true;
607 } else {
608 drop_view(colID) = false;
609 }
610 });
611
612 size_t dropStart = nnz;
613 if (classicalAlgo == unscaled_cut) {
614 // push diagonals and boundaries to the right, sort everything else by aij on the left
615 Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
616 if (drop_view(x) || drop_view(y)) {
617 return drop_view(x) < drop_view(y);
618 } else {
619 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
620 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
621 return x_aij > y_aij;
622 }
623 });
624
625 // find index where dropping starts
626 Kokkos::parallel_reduce(
627 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
628 auto const& x = index_view(i - 1);
629 auto const& y = index_view(i);
630 typename implATS::magnitudeType x_aij = 0;
631 typename implATS::magnitudeType y_aij = 0;
632 if (!drop_view(x)) {
633 x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
634 }
635 if (!drop_view(y)) {
636 y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
637 }
638
639 if (realThresholdKokkos * realThresholdKokkos * x_aij > y_aij) {
640 if (i < min) {
641 min = i;
642 }
643 }
644 },
645 Kokkos::Min<size_t>(dropStart));
646 } else if (classicalAlgo == scaled_cut) {
647 // push diagonals and boundaries to the right, sort everything else by aij/aiiajj on the left
648 Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool {
649 if (drop_view(x) || drop_view(y)) {
650 return drop_view(x) < drop_view(y);
651 } else {
652 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
653 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
654 auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
655 auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
656 return (x_aij / x_aiiajj) > (y_aij / y_aiiajj);
657 }
658 });
659
660 // find index where dropping starts
661 Kokkos::parallel_reduce(
662 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) {
663 auto const& x = index_view(i - 1);
664 auto const& y = index_view(i);
665 typename implATS::magnitudeType x_val = 0;
666 typename implATS::magnitudeType y_val = 0;
667 if (!drop_view(x)) {
668 typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
669 typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
670 x_val = x_aij / x_aiiajj;
671 }
672 if (!drop_view(y)) {
673 typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
674 typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
675 y_val = y_aij / y_aiiajj;
676 }
677
678 if (realThresholdKokkos * realThresholdKokkos * x_val > y_val) {
679 if (i < min) {
680 min = i;
681 }
682 }
683 },
684 Kokkos::Min<size_t>(dropStart));
685 }
686
687 // drop everything to the right of where values stop passing threshold
688 if (dropStart < nnz) {
689 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](size_t i) {
690 drop_view(index_view(i)) = true;
691 });
692 }
693
694 LO rownnz = 0;
695 GO rowDropped = 0;
696 Kokkos::parallel_reduce(
697 Kokkos::TeamThreadRange(teamMember, nnz), [=](const size_t idxID, LO& keep, GO& drop) {
698 LO col = rowView.colidx(idxID);
699 // don't drop diagonal
700 if (row == col || !drop_view(idxID)) {
701 columnsDevice(A_device.graph.row_map(row) + idxID) = col;
702 keep++;
703 } else {
704 columnsDevice(A_device.graph.row_map(row) + idxID) = -1;
705 drop++;
706 }
707 },
708 rownnz, rowDropped);
709
710 globalnnz += rownnz;
711 totalDropped += rowDropped;
712 rownnzView(row) = rownnz;
713 },
714 realnnz, numDropped);
715
716 // update column indices so that kept indices are aligned to the left for subview that happens later on
717 Kokkos::Experimental::remove(ExecSpace(), columnsDevice, -1);
718 Kokkos::deep_copy(columns, columnsDevice);
719
720 // update row indices by adding up new # of nnz in each row
721 auto rowsDevice = Kokkos::create_mirror_view(ExecSpace(), rows);
722 Kokkos::parallel_scan(
723 Kokkos::RangePolicy<ExecSpace>(0, A_device.numRows()), KOKKOS_LAMBDA(const int i, LO& partial_sum, bool is_final) {
724 partial_sum += rownnzView(i);
725 if (is_final) rowsDevice(i + 1) = partial_sum;
726 });
727 Kokkos::deep_copy(rows, rowsDevice);
728 }
729
730 numTotal = A->getLocalNumEntries();
731
732 if (aggregationMayCreateDirichlet) {
733 // If the only element remaining after filtering is diagonal, mark node as boundary
734 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
735 if (rows[row + 1] - rows[row] <= 1)
736 boundaryNodes[row] = true;
737 }
738 }
739
740 RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "thresholded graph of A"));
741 graph->SetBoundaryNodeMap(boundaryNodes);
742 if (GetVerbLevel() & Statistics1) {
743 GO numLocalBoundaryNodes = 0;
744 GO numGlobalBoundaryNodes = 0;
745 for (size_t i = 0; i < boundaryNodes.size(); ++i)
746 if (boundaryNodes(i))
747 numLocalBoundaryNodes++;
748 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
749 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
750 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
751 }
752 Set(currentLevel, "Graph", graph);
753 Set(currentLevel, "DofsPerNode", 1);
754
755 // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
756 if (generateColoringGraph) {
757 RCP<LWGraph> colorGraph;
758 RCP<const Import> importer = A->getCrsGraph()->getImporter();
759 BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer);
760 Set(currentLevel, "Coloring Graph", colorGraph);
761 // #define CMS_DUMP
762#ifdef CMS_DUMP
763 {
764 Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_regular_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
765 Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_color_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
766 // int rank = graph->GetDomainMap()->getComm()->getRank();
767 // {
768 // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
769 // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
770 // colorGraph->print(*fancy,Debug);
771 // }
772 // {
773 // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
774 // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
775 // graph->print(*fancy,Debug);
776 // }
777 }
778#endif
779 } // end generateColoringGraph
780 } else if (BlockSize > 1 && threshold == STS::zero()) {
781 // Case 3: Multiple DOF/node problem without dropping
782 const RCP<const Map> rowMap = A->getRowMap();
783 const RCP<const Map> colMap = A->getColMap();
784
785 graphType = "amalgamated";
786
787 // build node row map (uniqueMap) and node column map (nonUniqueMap)
788 // the arrays rowTranslation and colTranslation contain the local node id
789 // given a local dof id. The data is calculated by the AmalgamationFactory and
790 // stored in the variable container "UnAmalgamationInfo"
791 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
792 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
793 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
794 Array<LO> colTranslation = *(amalInfo->getColTranslation());
795
796 // get number of local nodes
797 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
798
799 // Allocate space for the local graph
800 typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
801 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
802
803 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
804 Kokkos::deep_copy(amalgBoundaryNodes, false);
805
806 // Detect and record rows that correspond to Dirichlet boundary conditions
807 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
808 // TODO the array one bigger than the number of local rows, and the last entry can
809 // TODO hold the actual number of boundary nodes. Clever, huh?
810 ArrayRCP<bool> pointBoundaryNodes;
811 pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows(*A, dirichletThreshold));
812 if (rowSumTol > 0.)
813 Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
814
815 // extract striding information
816 LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
817 LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
818 LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
819 if (A->IsView("stridedMaps") == true) {
820 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
821 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
822 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
823 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
824 blkId = strMap->getStridedBlockId();
825 if (blkId > -1)
826 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
827 }
828
829 // loop over all local nodes
830 LO realnnz = 0;
831 rows(0) = 0;
832 Array<LO> indicesExtra;
833 for (LO row = 0; row < numRows; row++) {
834 ArrayView<const LO> indices;
835 indicesExtra.resize(0);
836
837 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
838 // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
839 // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
840 // with local ids.
841 // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
842 // node.
843 bool isBoundary = false;
844 if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
845 for (LO j = 0; j < blkPartSize; j++) {
846 if (pointBoundaryNodes[row * blkPartSize + j]) {
847 isBoundary = true;
848 break;
849 }
850 }
851 } else {
852 isBoundary = true;
853 for (LO j = 0; j < blkPartSize; j++) {
854 if (!pointBoundaryNodes[row * blkPartSize + j]) {
855 isBoundary = false;
856 break;
857 }
858 }
859 }
860
861 // Merge rows of A
862 // The array indicesExtra contains local column node ids for the current local node "row"
863 if (!isBoundary)
864 MergeRows(*A, row, indicesExtra, colTranslation);
865 else
866 indicesExtra.push_back(row);
867 indices = indicesExtra;
868 numTotal += indices.size();
869
870 // add the local column node ids to the full columns array which
871 // contains the local column node ids for all local node rows
872 LO nnz = indices.size(), rownnz = 0;
873 for (LO colID = 0; colID < nnz; colID++) {
874 LO col = indices[colID];
875 columns(realnnz++) = col;
876 rownnz++;
877 }
878
879 if (rownnz == 1) {
880 // If the only element remaining after filtering is diagonal, mark node as boundary
881 // FIXME: this should really be replaced by the following
882 // if (indices.size() == 1 && indices[0] == row)
883 // boundaryNodes[row] = true;
884 // We do not do it this way now because there is no framework for distinguishing isolated
885 // and boundary nodes in the aggregation algorithms
886 amalgBoundaryNodes[row] = true;
887 }
888 rows(row + 1) = realnnz;
889 } // for (LO row = 0; row < numRows; row++)
890
891 RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
892 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
893
894 if (GetVerbLevel() & Statistics1) {
895 GO numLocalBoundaryNodes = 0;
896 GO numGlobalBoundaryNodes = 0;
897
898 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
899 if (amalgBoundaryNodes(i))
900 numLocalBoundaryNodes++;
901
902 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
903 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
904 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
905 << " agglomerated Dirichlet nodes" << std::endl;
906 }
907
908 Set(currentLevel, "Graph", graph);
909 Set(currentLevel, "DofsPerNode", blkSize); // full block size
910
911 } else if (BlockSize > 1 && threshold != STS::zero()) {
912 // Case 4: Multiple DOF/node problem with dropping
913 const RCP<const Map> rowMap = A->getRowMap();
914 const RCP<const Map> colMap = A->getColMap();
915 graphType = "amalgamated";
916
917 // build node row map (uniqueMap) and node column map (nonUniqueMap)
918 // the arrays rowTranslation and colTranslation contain the local node id
919 // given a local dof id. The data is calculated by the AmalgamationFactory and
920 // stored in the variable container "UnAmalgamationInfo"
921 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
922 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
923 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
924 Array<LO> colTranslation = *(amalInfo->getColTranslation());
925
926 // get number of local nodes
927 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
928
929 // Allocate space for the local graph
930 typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
931 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
932
933 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
934 Kokkos::deep_copy(amalgBoundaryNodes, false);
935
936 // Detect and record rows that correspond to Dirichlet boundary conditions
937 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
938 // TODO the array one bigger than the number of local rows, and the last entry can
939 // TODO hold the actual number of boundary nodes. Clever, huh?
940 auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
941 if (rowSumTol > 0.)
942 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
943
944 // extract striding information
945 LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
946 LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
947 LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
948 if (A->IsView("stridedMaps") == true) {
949 Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
950 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
951 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
952 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
953 blkId = strMap->getStridedBlockId();
954 if (blkId > -1)
955 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
956 }
957
958 // extract diagonal data for dropping strategy
960 const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
961
962 // loop over all local nodes
963 LO realnnz = 0;
964 rows[0] = 0;
965 Array<LO> indicesExtra;
966 for (LO row = 0; row < numRows; row++) {
967 ArrayView<const LO> indices;
968 indicesExtra.resize(0);
969
970 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
971 // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
972 // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
973 // with local ids.
974 // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
975 // node.
976 bool isBoundary = false;
977 if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
978 for (LO j = 0; j < blkPartSize; j++) {
979 if (pointBoundaryNodes[row * blkPartSize + j]) {
980 isBoundary = true;
981 break;
982 }
983 }
984 } else {
985 isBoundary = true;
986 for (LO j = 0; j < blkPartSize; j++) {
987 if (!pointBoundaryNodes[row * blkPartSize + j]) {
988 isBoundary = false;
989 break;
990 }
991 }
992 }
993
994 // Merge rows of A
995 // The array indicesExtra contains local column node ids for the current local node "row"
996 if (!isBoundary)
997 MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
998 else
999 indicesExtra.push_back(row);
1000 indices = indicesExtra;
1001 numTotal += indices.size();
1002
1003 // add the local column node ids to the full columns array which
1004 // contains the local column node ids for all local node rows
1005 LO nnz = indices.size(), rownnz = 0;
1006 for (LO colID = 0; colID < nnz; colID++) {
1007 LO col = indices[colID];
1008 columns[realnnz++] = col;
1009 rownnz++;
1010 }
1011
1012 if (rownnz == 1) {
1013 // If the only element remaining after filtering is diagonal, mark node as boundary
1014 // FIXME: this should really be replaced by the following
1015 // if (indices.size() == 1 && indices[0] == row)
1016 // boundaryNodes[row] = true;
1017 // We do not do it this way now because there is no framework for distinguishing isolated
1018 // and boundary nodes in the aggregation algorithms
1019 amalgBoundaryNodes[row] = true;
1020 }
1021 rows[row + 1] = realnnz;
1022 } // for (LO row = 0; row < numRows; row++)
1023 // columns.resize(realnnz);
1024
1025 RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1026 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1027
1028 if (GetVerbLevel() & Statistics1) {
1029 GO numLocalBoundaryNodes = 0;
1030 GO numGlobalBoundaryNodes = 0;
1031
1032 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1033 if (amalgBoundaryNodes(i))
1034 numLocalBoundaryNodes++;
1035
1036 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1037 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1038 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
1039 << " agglomerated Dirichlet nodes" << std::endl;
1040 }
1041
1042 Set(currentLevel, "Graph", graph);
1043 Set(currentLevel, "DofsPerNode", blkSize); // full block size
1044 }
1045
1046 } else if (algo == "distance laplacian") {
1047 LO blkSize = A->GetFixedBlockSize();
1048 GO indexBase = A->getRowMap()->getIndexBase();
1049 // [*0*] : FIXME
1050 // ap: somehow, if I move this line to [*1*], Belos throws an error
1051 // I'm not sure what's going on. Do we always have to Get data, if we did
1052 // DeclareInput for it?
1053 // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
1054
1055 // Detect and record rows that correspond to Dirichlet boundary conditions
1056 // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
1057 // TODO the array one bigger than the number of local rows, and the last entry can
1058 // TODO hold the actual number of boundary nodes. Clever, huh?
1059 auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1060 if (rowSumTol > 0.)
1061 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
1062
1063 if ((blkSize == 1) && (threshold == STS::zero())) {
1064 // Trivial case: scalar problem, no dropping. Can return original graph
1065 RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
1066 graph->SetBoundaryNodeMap(pointBoundaryNodes);
1067 graphType = "unamalgamated";
1068 numTotal = A->getLocalNumEntries();
1069
1070 if (GetVerbLevel() & Statistics1) {
1071 GO numLocalBoundaryNodes = 0;
1072 GO numGlobalBoundaryNodes = 0;
1073 for (size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1074 if (pointBoundaryNodes(i))
1075 numLocalBoundaryNodes++;
1076 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1077 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1078 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1079 }
1080
1081 Set(currentLevel, "DofsPerNode", blkSize);
1082 Set(currentLevel, "Graph", graph);
1083
1084 } else {
1085 // ap: We make quite a few assumptions here; general case may be a lot different,
1086 // but much much harder to implement. We assume that:
1087 // 1) all maps are standard maps, not strided maps
1088 // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
1089 // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
1090 //
1091 // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
1092 // but as I totally don't understand that code, here is my solution
1093
1094 // [*1*]: see [*0*]
1095
1096 // Check that the number of local coordinates is consistent with the #rows in A
1097 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() / blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1098 "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size (" << blkSize << ").");
1099
1100 const RCP<const Map> colMap = A->getColMap();
1101 RCP<const Map> uniqueMap, nonUniqueMap;
1102 Array<LO> colTranslation;
1103 if (blkSize == 1) {
1104 uniqueMap = A->getRowMap();
1105 nonUniqueMap = A->getColMap();
1106 graphType = "unamalgamated";
1107
1108 } else {
1109 uniqueMap = Coords->getMap();
1110 TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1111 "Different index bases for matrix and coordinates");
1112
1113 AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1114
1115 graphType = "amalgamated";
1116 }
1117 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1118
1119 RCP<RealValuedMultiVector> ghostedCoords;
1120 RCP<Vector> ghostedLaplDiag;
1121 Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1122 if (threshold != STS::zero()) {
1123 // Get ghost coordinates
1124 RCP<const Import> importer;
1125 {
1126 SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1127 if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1128 GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1129 importer = realA->getCrsGraph()->getImporter();
1130 } else {
1131 GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1132 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1133 }
1134 } // subtimer
1135 ghostedCoords = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(nonUniqueMap, Coords->getNumVectors());
1136 {
1137 SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1138 ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1139 } // subtimer
1140
1141 // Construct Distance Laplacian diagonal
1142 RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1143 Array<LO> indicesExtra;
1144 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1145 if (threshold != STS::zero()) {
1146 const size_t numVectors = ghostedCoords->getNumVectors();
1147 coordData.reserve(numVectors);
1148 for (size_t j = 0; j < numVectors; j++) {
1149 Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1150 coordData.push_back(tmpData);
1151 }
1152 }
1153 {
1154 SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1155 ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1156 for (LO row = 0; row < numRows; row++) {
1157 ArrayView<const LO> indices;
1158
1159 if (blkSize == 1) {
1160 ArrayView<const SC> vals;
1161 A->getLocalRowView(row, indices, vals);
1162
1163 } else {
1164 // Merge rows of A
1165 indicesExtra.resize(0);
1166 MergeRows(*A, row, indicesExtra, colTranslation);
1167 indices = indicesExtra;
1168 }
1169
1170 LO nnz = indices.size();
1171 bool haveAddedToDiag = false;
1172 for (LO colID = 0; colID < nnz; colID++) {
1173 const LO col = indices[colID];
1174
1175 if (row != col) {
1176 if (use_dlap_weights == SINGLE_WEIGHTS) {
1177 /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1178 MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1179 MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1180 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1181 } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1182 int block_id = row % interleaved_blocksize;
1183 int block_start = block_id * interleaved_blocksize;
1184 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1185 } else {
1186 // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1187 localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1188 }
1189 haveAddedToDiag = true;
1190 }
1191 }
1192 // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1193 // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1194 if (!haveAddedToDiag)
1195 localLaplDiagData[row] = STS::squareroot(STS::rmax());
1196 }
1197 } // subtimer
1198 {
1199 SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1200 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1201 ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1202 ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1203 } // subtimer
1204
1205 } else {
1206 GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1207 }
1208
1209 // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1210
1211 // allocate space for the local graph
1212 typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
1213 typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
1214
1215#ifdef HAVE_MUELU_DEBUG
1216 // DEBUGGING
1217 for (LO i = 0; i < (LO)columns.size(); i++) columns[i] = -666;
1218#endif
1219
1220 // Extra array for if we're allowing symmetrization with cutting
1221 ArrayRCP<LO> rows_stop;
1222 bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1223 if (use_stop_array)
1224 // rows_stop = typename LWGraph::row_type::non_const_type("rows_stop", numRows);
1225 rows_stop.resize(numRows);
1226
1227 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
1228 Kokkos::deep_copy(amalgBoundaryNodes, false);
1229
1230 LO realnnz = 0;
1231 rows(0) = 0;
1232
1233 Array<LO> indicesExtra;
1234 {
1235 SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1236 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1237 if (threshold != STS::zero()) {
1238 const size_t numVectors = ghostedCoords->getNumVectors();
1239 coordData.reserve(numVectors);
1240 for (size_t j = 0; j < numVectors; j++) {
1241 Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1242 coordData.push_back(tmpData);
1243 }
1244 }
1245
1246 ArrayView<const SC> vals; // CMS hackery
1247 for (LO row = 0; row < numRows; row++) {
1248 ArrayView<const LO> indices;
1249 indicesExtra.resize(0);
1250 bool isBoundary = false;
1251
1252 if (blkSize == 1) {
1253 // ArrayView<const SC> vals;//CMS uncomment
1254 A->getLocalRowView(row, indices, vals);
1255 isBoundary = pointBoundaryNodes[row];
1256 } else {
1257 // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1258 isBoundary = true;
1259 for (LO j = 0; j < blkSize; j++) {
1260 if (!pointBoundaryNodes[row * blkSize + j]) {
1261 isBoundary = false;
1262 break;
1263 }
1264 }
1265
1266 // Merge rows of A
1267 if (!isBoundary)
1268 MergeRows(*A, row, indicesExtra, colTranslation);
1269 else
1270 indicesExtra.push_back(row);
1271 indices = indicesExtra;
1272 }
1273 numTotal += indices.size();
1274
1275 LO nnz = indices.size(), rownnz = 0;
1276
1277 if (use_stop_array) {
1278 rows(row + 1) = rows(row) + nnz;
1279 realnnz = rows(row);
1280 }
1281
1282 if (threshold != STS::zero()) {
1283 // default
1284 if (distanceLaplacianAlgo == defaultAlgo) {
1285 /* Standard Distance Laplacian */
1286 for (LO colID = 0; colID < nnz; colID++) {
1287 LO col = indices[colID];
1288
1289 if (row == col) {
1290 columns(realnnz++) = col;
1291 rownnz++;
1292 continue;
1293 }
1294
1295 // We do not want the distance Laplacian aggregating boundary nodes
1296 if (isBoundary) continue;
1297
1298 SC laplVal;
1299 if (use_dlap_weights == SINGLE_WEIGHTS) {
1300 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1301 } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1302 int block_id = row % interleaved_blocksize;
1303 int block_start = block_id * interleaved_blocksize;
1304 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1305 } else {
1306 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1307 }
1308 real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1309 real_type aij = STS::magnitude(laplVal * laplVal);
1310
1311 if (aij > aiiajj) {
1312 columns(realnnz++) = col;
1313 rownnz++;
1314 } else {
1315 numDropped++;
1316 }
1317 }
1318 } else {
1319 /* Cut Algorithm */
1320 using DropTol = Details::DropTol<real_type, LO>;
1321 std::vector<DropTol> drop_vec;
1322 drop_vec.reserve(nnz);
1323 const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1324 const real_type one = Teuchos::ScalarTraits<real_type>::one();
1325
1326 // find magnitudes
1327 for (LO colID = 0; colID < nnz; colID++) {
1328 LO col = indices[colID];
1329
1330 if (row == col) {
1331 drop_vec.emplace_back(zero, one, colID, false);
1332 continue;
1333 }
1334 // We do not want the distance Laplacian aggregating boundary nodes
1335 if (isBoundary) continue;
1336
1337 SC laplVal;
1338 if (use_dlap_weights == SINGLE_WEIGHTS) {
1339 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1340 } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1341 int block_id = row % interleaved_blocksize;
1342 int block_start = block_id * interleaved_blocksize;
1343 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1344 } else {
1345 laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1346 }
1347
1348 real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1349 real_type aij = STS::magnitude(laplVal * laplVal);
1350
1351 drop_vec.emplace_back(aij, aiiajj, colID, false);
1352 }
1353
1354 const size_t n = drop_vec.size();
1355
1356 if (distanceLaplacianAlgo == unscaled_cut) {
1357 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1358 return a.val > b.val;
1359 });
1360
1361 bool drop = false;
1362 for (size_t i = 1; i < n; ++i) {
1363 if (!drop) {
1364 auto const& x = drop_vec[i - 1];
1365 auto const& y = drop_vec[i];
1366 auto a = x.val;
1367 auto b = y.val;
1368 if (realThreshold * realThreshold * a > b) {
1369 drop = true;
1370#ifdef HAVE_MUELU_DEBUG
1371 if (distanceLaplacianCutVerbose) {
1372 std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1373 }
1374#endif
1375 }
1376 }
1377 drop_vec[i].drop = drop;
1378 }
1379 } else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1380 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1381 return a.val / a.diag > b.val / b.diag;
1382 });
1383
1384 bool drop = false;
1385 for (size_t i = 1; i < n; ++i) {
1386 if (!drop) {
1387 auto const& x = drop_vec[i - 1];
1388 auto const& y = drop_vec[i];
1389 auto a = x.val / x.diag;
1390 auto b = y.val / y.diag;
1391 if (realThreshold * realThreshold * a > b) {
1392 drop = true;
1393#ifdef HAVE_MUELU_DEBUG
1394 if (distanceLaplacianCutVerbose) {
1395 std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1396 }
1397#endif
1398 }
1399 }
1400 drop_vec[i].drop = drop;
1401 }
1402 }
1403
1404 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1405 return a.col < b.col;
1406 });
1407
1408 for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) {
1409 LO col = indices[drop_vec[idxID].col];
1410
1411 // don't drop diagonal
1412 if (row == col) {
1413 columns(realnnz++) = col;
1414 rownnz++;
1415 // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1416 continue;
1417 }
1418
1419 if (!drop_vec[idxID].drop) {
1420 columns(realnnz++) = col;
1421 // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1422 rownnz++;
1423 } else {
1424 // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1425 numDropped++;
1426 }
1427 }
1428 }
1429 } else {
1430 // Skip laplace calculation and threshold comparison for zero threshold
1431 for (LO colID = 0; colID < nnz; colID++) {
1432 LO col = indices[colID];
1433 columns(realnnz++) = col;
1434 rownnz++;
1435 }
1436 }
1437
1438 if (rownnz == 1) {
1439 // If the only element remaining after filtering is diagonal, mark node as boundary
1440 // FIXME: this should really be replaced by the following
1441 // if (indices.size() == 1 && indices[0] == row)
1442 // boundaryNodes[row] = true;
1443 // We do not do it this way now because there is no framework for distinguishing isolated
1444 // and boundary nodes in the aggregation algorithms
1445 amalgBoundaryNodes[row] = true;
1446 }
1447
1448 if (use_stop_array)
1449 rows_stop[row] = rownnz + rows[row];
1450 else
1451 rows[row + 1] = realnnz;
1452 } // for (LO row = 0; row < numRows; row++)
1453
1454 } // subtimer
1455
1456 if (use_stop_array) {
1457 // Do symmetrization of the cut matrix
1458 // NOTE: We assume nested row/column maps here
1459 for (LO row = 0; row < numRows; row++) {
1460 for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1461 LO col = columns[colidx];
1462 if (col >= numRows) continue;
1463
1464 bool found = false;
1465 for (LO t_col = rows(col); !found && t_col < rows_stop[col]; t_col++) {
1466 if (columns[t_col] == row)
1467 found = true;
1468 }
1469 // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1470 // into a Dirichlet unknown. In that case don't.
1471 if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1472 LO new_idx = rows_stop[col];
1473 // printf("(%d,%d) SYMADD entry\n",col,row);
1474 columns[new_idx] = row;
1475 rows_stop[col]++;
1476 numDropped--;
1477 }
1478 }
1479 }
1480
1481 // Condense everything down
1482 LO current_start = 0;
1483 for (LO row = 0; row < numRows; row++) {
1484 LO old_start = current_start;
1485 for (LO col = rows(row); col < rows_stop[row]; col++) {
1486 if (current_start != col) {
1487 columns(current_start) = columns(col);
1488 }
1489 current_start++;
1490 }
1491 rows[row] = old_start;
1492 }
1493 rows(numRows) = realnnz = current_start;
1494 }
1495
1496 RCP<LWGraph> graph;
1497 {
1498 SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1499 graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1500 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1501 } // subtimer
1502
1503 if (GetVerbLevel() & Statistics1) {
1504 GO numLocalBoundaryNodes = 0;
1505 GO numGlobalBoundaryNodes = 0;
1506
1507 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1508 if (amalgBoundaryNodes(i))
1509 numLocalBoundaryNodes++;
1510
1511 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1512 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1513 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1514 << " using threshold " << dirichletThreshold << std::endl;
1515 }
1516
1517 Set(currentLevel, "Graph", graph);
1518 Set(currentLevel, "DofsPerNode", blkSize);
1519 }
1520 }
1521
1522 if (GetVerbLevel() & Statistics1) {
1523 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1524 GO numGlobalTotal, numGlobalDropped;
1525 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1526 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1527 GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1528 if (numGlobalTotal != 0)
1529 GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1530 GetOStream(Statistics1) << std::endl;
1531 }
1532
1533 } else {
1534 // what Tobias has implemented
1535
1536 SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1537 // GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1538 GetOStream(Runtime0) << "algorithm = \""
1539 << "failsafe"
1540 << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1541 Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1542
1543 RCP<const Map> rowMap = A->getRowMap();
1544 RCP<const Map> colMap = A->getColMap();
1545
1546 LO blockdim = 1; // block dim for fixed size blocks
1547 GO indexBase = rowMap->getIndexBase(); // index base of maps
1548 GO offset = 0;
1549
1550 // 1) check for blocking/striding information
1551 if (A->IsView("stridedMaps") &&
1552 Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1553 Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1554 RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1555 TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1556 blockdim = strMap->getFixedBlockSize();
1557 offset = strMap->getOffset();
1558 oldView = A->SwitchToView(oldView);
1559 GetOStream(Statistics1) << "CoalesceDropFactory::Build():"
1560 << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1561 } else
1562 GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1563
1564 // 2) get row map for amalgamated matrix (graph of A)
1565 // with same distribution over all procs as row map of A
1566 RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1567 GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1568
1569 // 3) create graph of amalgamated matrix
1570 RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1571
1572 LO numRows = A->getRowMap()->getLocalNumElements();
1573 LO numNodes = nodeMap->getLocalNumElements();
1574 typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numNodes);
1575 Kokkos::deep_copy(amalgBoundaryNodes, false);
1576 const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1577 bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1578
1579 // 4) do amalgamation. generate graph of amalgamated matrix
1580 // Note, this code is much more inefficient than the leightwight implementation
1581 // Most of the work has already been done in the AmalgamationFactory
1582 for (LO row = 0; row < numRows; row++) {
1583 // get global DOF id
1584 GO grid = rowMap->getGlobalElement(row);
1585
1586 // reinitialize boolean helper variable
1587 bIsDiagonalEntry = false;
1588
1589 // translate grid to nodeid
1590 GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1591
1592 size_t nnz = A->getNumEntriesInLocalRow(row);
1593 Teuchos::ArrayView<const LO> indices;
1594 Teuchos::ArrayView<const SC> vals;
1595 A->getLocalRowView(row, indices, vals);
1596
1597 RCP<std::vector<GO>> cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1598 LO realnnz = 0;
1599 for (LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1600 GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1601
1602 if (vals[col] != STS::zero()) {
1603 GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1604 cnodeIds->push_back(cnodeId);
1605 realnnz++; // increment number of nnz in matrix row
1606 if (grid == gcid) bIsDiagonalEntry = true;
1607 }
1608 }
1609
1610 if (realnnz == 1 && bIsDiagonalEntry == true) {
1611 LO lNodeId = nodeMap->getLocalElement(nodeId);
1612 numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1613 if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1614 amalgBoundaryNodes[lNodeId] = true;
1615 }
1616
1617 Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp(cnodeIds);
1618
1619 if (arr_cnodeIds.size() > 0)
1620 crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1621 }
1622 // fill matrix graph
1623 crsGraph->fillComplete(nodeMap, nodeMap);
1624
1625 // 5) create MueLu Graph object
1626 RCP<LWGraph> graph = rcp(new LWGraph(crsGraph, "amalgamated graph of A"));
1627
1628 // Detect and record rows that correspond to Dirichlet boundary conditions
1629 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1630
1631 if (GetVerbLevel() & Statistics1) {
1632 GO numLocalBoundaryNodes = 0;
1633 GO numGlobalBoundaryNodes = 0;
1634 for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1635 if (amalgBoundaryNodes(i))
1636 numLocalBoundaryNodes++;
1637 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1638 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1639 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1640 }
1641
1642 // 6) store results in Level
1643 // graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1644 Set(currentLevel, "DofsPerNode", blockdim);
1645 Set(currentLevel, "Graph", graph);
1646
1647 } // if (doExperimentalWrap) ... else ...
1648
1649} // Build
1650
1651template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1652void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1653 typedef typename ArrayView<const LO>::size_type size_type;
1654
1655 // extract striding information
1656 LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1657 if (A.IsView("stridedMaps") == true) {
1658 Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1659 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1660 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1661 if (strMap->getStridedBlockId() > -1)
1662 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1663 }
1664
1665 // count nonzero entries in all dof rows associated with node row
1666 size_t nnz = 0, pos = 0;
1667 for (LO j = 0; j < blkSize; j++)
1668 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1669
1670 if (nnz == 0) {
1671 cols.resize(0);
1672 return;
1673 }
1674
1675 cols.resize(nnz);
1676
1677 // loop over all local dof rows associated with local node "row"
1678 ArrayView<const LO> inds;
1679 ArrayView<const SC> vals;
1680 for (LO j = 0; j < blkSize; j++) {
1681 A.getLocalRowView(row * blkSize + j, inds, vals);
1682 size_type numIndices = inds.size();
1683
1684 if (numIndices == 0) // skip empty dof rows
1685 continue;
1686
1687 // cols: stores all local node ids for current local node id "row"
1688 cols[pos++] = translation[inds[0]];
1689 for (size_type k = 1; k < numIndices; k++) {
1690 LO nodeID = translation[inds[k]];
1691 // Here we try to speed up the process by reducing the size of an array
1692 // to sort. This works if the column nonzeros belonging to the same
1693 // node are stored consequently.
1694 if (nodeID != cols[pos - 1])
1695 cols[pos++] = nodeID;
1696 }
1697 }
1698 cols.resize(pos);
1699 nnz = pos;
1700
1701 // Sort and remove duplicates
1702 std::sort(cols.begin(), cols.end());
1703 pos = 0;
1704 for (size_t j = 1; j < nnz; j++)
1705 if (cols[j] != cols[pos])
1706 cols[++pos] = cols[j];
1707 cols.resize(pos + 1);
1708}
1709
1710template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1711void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1712 typedef typename ArrayView<const LO>::size_type size_type;
1713 typedef Teuchos::ScalarTraits<SC> STS;
1714
1715 // extract striding information
1716 LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1717 if (A.IsView("stridedMaps") == true) {
1718 Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1719 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1720 TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1721 if (strMap->getStridedBlockId() > -1)
1722 blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1723 }
1724
1725 // count nonzero entries in all dof rows associated with node row
1726 size_t nnz = 0, pos = 0;
1727 for (LO j = 0; j < blkSize; j++)
1728 nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1729
1730 if (nnz == 0) {
1731 cols.resize(0);
1732 return;
1733 }
1734
1735 cols.resize(nnz);
1736
1737 // loop over all local dof rows associated with local node "row"
1738 ArrayView<const LO> inds;
1739 ArrayView<const SC> vals;
1740 for (LO j = 0; j < blkSize; j++) {
1741 A.getLocalRowView(row * blkSize + j, inds, vals);
1742 size_type numIndices = inds.size();
1743
1744 if (numIndices == 0) // skip empty dof rows
1745 continue;
1746
1747 // cols: stores all local node ids for current local node id "row"
1748 LO prevNodeID = -1;
1749 for (size_type k = 0; k < numIndices; k++) {
1750 LO dofID = inds[k];
1751 LO nodeID = translation[inds[k]];
1752
1753 // we avoid a square root by using squared values
1754 typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]); // eps^2 * |a_ii| * |a_jj|
1755 typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]);
1756
1757 // check dropping criterion
1758 if (aij > aiiajj || (row * blkSize + j == dofID)) {
1759 // accept entry in graph
1760
1761 // Here we try to speed up the process by reducing the size of an array
1762 // to sort. This works if the column nonzeros belonging to the same
1763 // node are stored consequently.
1764 if (nodeID != prevNodeID) {
1765 cols[pos++] = nodeID;
1766 prevNodeID = nodeID;
1767 }
1768 }
1769 }
1770 }
1771 cols.resize(pos);
1772 nnz = pos;
1773
1774 // Sort and remove duplicates
1775 std::sort(cols.begin(), cols.end());
1776 pos = 0;
1777 for (size_t j = 1; j < nnz; j++)
1778 if (cols[j] != cols[pos])
1779 cols[++pos] = cols[j];
1780 cols.resize(pos + 1);
1781
1782 return;
1783}
1784
1785template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1786Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalize(Level& currentLevel, const RCP<Matrix>& A, bool generate_matrix) const {
1787 typedef Teuchos::ScalarTraits<SC> STS;
1788
1789 const ParameterList& pL = GetParameterList();
1790 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1791 const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1792
1793 RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
1794 RCP<LocalOrdinalVector> ghostedBlockNumber;
1795 GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
1796
1797 // Ghost the column block numbers if we need to
1798 RCP<const Import> importer = A->getCrsGraph()->getImporter();
1799 if (!importer.is_null()) {
1800 SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1801 ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
1802 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1803 } else {
1804 ghostedBlockNumber = BlockNumber;
1805 }
1806
1807 // Accessors for block numbers
1808 Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1809 Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1810
1811 // allocate space for the local graph
1812 typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type rows_mat;
1813 typename LWGraph::row_type::non_const_type rows_graph;
1814 typename LWGraph::entries_type::non_const_type columns;
1815 typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type values;
1816 RCP<CrsMatrixWrap> crs_matrix_wrap;
1817
1818 if (generate_matrix) {
1819 crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1820 rows_mat = typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type("rows_mat", A->getLocalNumRows() + 1);
1821 } else {
1822 rows_graph = typename LWGraph::row_type::non_const_type("rows_graph", A->getLocalNumRows() + 1);
1823 }
1824 columns = typename LWGraph::entries_type::non_const_type("columns", A->getLocalNumEntries());
1825 values = typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type("values", A->getLocalNumEntries());
1826
1827 LO realnnz = 0;
1828 GO numDropped = 0, numTotal = 0;
1829 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1830 LO row_block = row_block_number[row];
1831 size_t nnz = A->getNumEntriesInLocalRow(row);
1832 ArrayView<const LO> indices;
1833 ArrayView<const SC> vals;
1834 A->getLocalRowView(row, indices, vals);
1835
1836 LO rownnz = 0;
1837 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1838 LO col = indices[colID];
1839 LO col_block = col_block_number[col];
1840
1841 if (row_block == col_block) {
1842 if (generate_matrix) values[realnnz] = vals[colID];
1843 columns[realnnz++] = col;
1844 rownnz++;
1845 } else
1846 numDropped++;
1847 }
1848 if (generate_matrix)
1849 rows_mat[row + 1] = realnnz;
1850 else
1851 rows_graph[row + 1] = realnnz;
1852 }
1853
1854 auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1855 if (rowSumTol > 0.)
1856 Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
1857
1858 numTotal = A->getLocalNumEntries();
1859
1860 if (GetVerbLevel() & Statistics1) {
1861 GO numLocalBoundaryNodes = 0;
1862 GO numGlobalBoundaryNodes = 0;
1863 for (size_t i = 0; i < boundaryNodes.size(); ++i)
1864 if (boundaryNodes(i))
1865 numLocalBoundaryNodes++;
1866 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1867 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1868 GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1869
1870 GO numGlobalTotal, numGlobalDropped;
1871 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1872 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1873 GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1874 if (numGlobalTotal != 0)
1875 GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1876 GetOStream(Statistics1) << std::endl;
1877 }
1878
1879 Set(currentLevel, "Filtering", true);
1880
1881 if (generate_matrix) {
1882 // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1883 // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1884 // here, which is legit, because we never use them anyway.
1885 if constexpr (std::is_same<typename LWGraph::row_type,
1886 typename CrsMatrix::local_matrix_type::row_map_type>::value) {
1887 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values);
1888 } else {
1889 auto rows_mat2 = typename CrsMatrix::local_matrix_type::row_map_type::non_const_type("rows_mat2", rows_mat.extent(0));
1890 Kokkos::deep_copy(rows_mat2, rows_mat);
1891 auto columns2 = typename CrsMatrix::local_graph_type::entries_type::non_const_type("columns2", columns.extent(0));
1892 Kokkos::deep_copy(columns2, columns);
1893 auto values2 = typename CrsMatrix::local_matrix_type::values_type::non_const_type("values2", values.extent(0));
1894 Kokkos::deep_copy(values2, values);
1895 crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat2, columns2, values2);
1896 }
1897 crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1898 } else {
1899 RCP<LWGraph> graph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1900 graph->SetBoundaryNodeMap(boundaryNodes);
1901 Set(currentLevel, "Graph", graph);
1902 }
1903
1904 Set(currentLevel, "DofsPerNode", 1);
1905 return crs_matrix_wrap;
1906}
1907
1908template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1909void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalizeGraph(const RCP<LWGraph>& inputGraph, const RCP<LocalOrdinalVector>& ghostedBlockNumber, RCP<LWGraph>& outputGraph, RCP<const Import>& importer) const {
1910 TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1911 const ParameterList& pL = GetParameterList();
1912
1913 const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1914
1915 GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1916 if (localizeColoringGraph)
1917 GetOStream(Statistics1) << ", with localization" << std::endl;
1918 else
1919 GetOStream(Statistics1) << ", without localization" << std::endl;
1920
1921 // Accessors for block numbers
1922 Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1923 Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1924
1925 // allocate space for the local graph
1926 ArrayRCP<size_t> rows_mat;
1927 typename LWGraph::row_type::non_const_type rows_graph("rows_graph", inputGraph->GetNodeNumVertices() + 1);
1928 typename LWGraph::entries_type::non_const_type columns("columns", inputGraph->GetNodeNumEdges());
1929
1930 LO realnnz = 0;
1931 GO numDropped = 0, numTotal = 0;
1932 const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getLocalNumElements());
1933 if (localizeColoringGraph) {
1934 for (LO row = 0; row < numRows; ++row) {
1935 LO row_block = row_block_number[row];
1936 auto indices = inputGraph->getNeighborVertices(row);
1937
1938 LO rownnz = 0;
1939 for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1940 LO col = indices(colID);
1941 LO col_block = col_block_number[col];
1942
1943 if ((row_block == col_block) && (col < numRows)) {
1944 columns(realnnz++) = col;
1945 rownnz++;
1946 } else
1947 numDropped++;
1948 }
1949 rows_graph(row + 1) = realnnz;
1950 }
1951 } else {
1952 // ghosting of boundary node map
1953 auto boundaryNodes = inputGraph->GetBoundaryNodeMap();
1954 auto boundaryNodesVector = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(inputGraph->GetDomainMap());
1955 for (size_t i = 0; i < inputGraph->GetNodeNumVertices(); i++)
1956 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1957 // Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1958 auto boundaryColumnVector = Xpetra::VectorFactory<LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node>::Build(inputGraph->GetImportMap());
1959 boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
1960 auto boundaryColumn = boundaryColumnVector->getData(0);
1961
1962 for (LO row = 0; row < numRows; ++row) {
1963 LO row_block = row_block_number[row];
1964 auto indices = inputGraph->getNeighborVertices(row);
1965
1966 LO rownnz = 0;
1967 for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1968 LO col = indices(colID);
1969 LO col_block = col_block_number[col];
1970
1971 if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1972 columns(realnnz++) = col;
1973 rownnz++;
1974 } else
1975 numDropped++;
1976 }
1977 rows_graph(row + 1) = realnnz;
1978 }
1979 }
1980
1981 numTotal = inputGraph->GetNodeNumEdges();
1982
1983 if (GetVerbLevel() & Statistics1) {
1984 RCP<const Teuchos::Comm<int>> comm = inputGraph->GetDomainMap()->getComm();
1985 GO numGlobalTotal, numGlobalDropped;
1986 MueLu_sumAll(comm, numTotal, numGlobalTotal);
1987 MueLu_sumAll(comm, numDropped, numGlobalDropped);
1988 GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1989 if (numGlobalTotal != 0)
1990 GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1991 GetOStream(Statistics1) << std::endl;
1992 }
1993
1994 if (localizeColoringGraph) {
1995 outputGraph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1996 outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1997 } else {
1998 TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
1999#ifdef HAVE_XPETRA_TPETRA
2000 auto outputGraph2 = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
2001
2002 auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
2003 auto sym = rcp(new Tpetra::CrsGraphTransposer<LocalOrdinal, GlobalOrdinal, Node>(tpGraph));
2004 auto tpGraphSym = sym->symmetrize();
2005 auto lclGraphSym = tpGraphSym->getLocalGraphHost();
2006 auto colIndsSym = lclGraphSym.entries;
2007
2008 auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
2009 typename LWGraph::row_type::non_const_type rows_graphSym("rows_graphSym", rowsSym.size());
2010 for (size_t row = 0; row < rowsSym.size(); row++)
2011 rows_graphSym(row) = rowsSym(row);
2012 outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
2013 outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
2014#endif
2015 }
2016}
2017
2018} // namespace MueLu
2019
2020#endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
void DeclareInput(Level &currentLevel) const
Input.
void BlockDiagonalizeGraph(const RCP< LWGraph > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< LWGraph > &outputGraph, RCP< const Import > &importer) const
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
RCP< PreDropFunctionBaseClass > predrop_
Exception indicating invalid cast attempted.
Exception throws to report incompatible objects (like maps).
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
virtual const Teuchos::ParameterList & GetParameterList() const
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::RCP< Vector > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &A)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar > > &v, DefaultLocalOrdinal i0, DefaultLocalOrdinal i1)
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
VerbLevel GetVerbLevel() const
Get the verbosity level.
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line).
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Warnings1
Additional warnings.
@ Parameters0
Print class parameters.
DropTol & operator=(DropTol const &)=default
DropTol(DropTol &&)=default
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
DropTol & operator=(DropTol &&)=default
DropTol(DropTol const &)=default