MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_FilteredAFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_FILTEREDAFACTORY_DEF_HPP
11#define MUELU_FILTEREDAFACTORY_DEF_HPP
12
13#include <Xpetra_Matrix.hpp>
14#include <Xpetra_MatrixFactory.hpp>
15#include <Xpetra_IO.hpp>
16
18
19#include "MueLu_Level.hpp"
20#include "MueLu_MasterList.hpp"
21#include "MueLu_Monitor.hpp"
22#include "MueLu_Aggregates.hpp"
23#include "MueLu_AmalgamationInfo.hpp"
24#include "MueLu_Utilities.hpp"
25
26// Variable to enable lots of debug output
27#define MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING 0
28
29namespace MueLu {
30
31template <class T>
32void sort_and_unique(T& array) {
33 std::sort(array.begin(), array.end());
34 auto last = std::unique(array.begin(), array.end());
35 array.erase(last, array.end());
36}
37
38template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
40 RCP<ParameterList> validParamList = rcp(new ParameterList());
41
42#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
43 SET_VALID_ENTRY("filtered matrix: use lumping");
44 SET_VALID_ENTRY("filtered matrix: reuse graph");
45 SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
46 SET_VALID_ENTRY("filtered matrix: use root stencil");
47 SET_VALID_ENTRY("filtered matrix: use spread lumping");
48 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom growth factor");
49 SET_VALID_ENTRY("filtered matrix: spread lumping diag dom cap");
50 SET_VALID_ENTRY("filtered matrix: Dirichlet threshold");
51#undef SET_VALID_ENTRY
52
53 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used for filtering");
54 validParamList->set<RCP<const FactoryBase> >("Graph", Teuchos::null, "Generating factory for coalesced filtered graph");
55 validParamList->set<RCP<const FactoryBase> >("Filtering", Teuchos::null, "Generating factory for filtering boolean");
56
57 // Only need these for the "use root stencil" option
58 validParamList->set<RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
59 validParamList->set<RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
60 return validParamList;
61}
62
63template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
65 Input(currentLevel, "A");
66 Input(currentLevel, "Filtering");
67 Input(currentLevel, "Graph");
68 const ParameterList& pL = GetParameterList();
69 if (pL.isParameter("filtered matrix: use root stencil") && pL.get<bool>("filtered matrix: use root stencil") == true) {
70 Input(currentLevel, "Aggregates");
71 Input(currentLevel, "UnAmalgamationInfo");
72 }
73}
74
75template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
77 FactoryMonitor m(*this, "Matrix filtering", currentLevel);
78
79 RCP<Matrix> A = Get<RCP<Matrix> >(currentLevel, "A");
80 if (Get<bool>(currentLevel, "Filtering") == false) {
81 GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl;
82 Set(currentLevel, "A", A);
83 return;
84 }
85
86 const ParameterList& pL = GetParameterList();
87 bool lumping = pL.get<bool>("filtered matrix: use lumping");
88 if (lumping)
89 GetOStream(Runtime0) << "Lumping dropped entries" << std::endl;
90
91 bool use_spread_lumping = pL.get<bool>("filtered matrix: use spread lumping");
92 if (use_spread_lumping && (!lumping))
93 throw std::runtime_error("Must also request 'filtered matrix: use lumping' in order to use spread lumping");
94
95 if (use_spread_lumping) {
96 GetOStream(Runtime0) << "using spread lumping " << std::endl;
97 }
98
99 double DdomAllowGrowthRate = 1.1;
100 double DdomCap = 2.0;
101 if (use_spread_lumping) {
102 DdomAllowGrowthRate = pL.get<double>("filtered matrix: spread lumping diag dom growth factor");
103 DdomCap = pL.get<double>("filtered matrix: spread lumping diag dom cap");
104 }
105 bool use_root_stencil = lumping && pL.get<bool>("filtered matrix: use root stencil");
106 if (use_root_stencil)
107 GetOStream(Runtime0) << "Using root stencil for dropping" << std::endl;
108 double dirichlet_threshold = pL.get<double>("filtered matrix: Dirichlet threshold");
109 if (dirichlet_threshold >= 0.0)
110 GetOStream(Runtime0) << "Filtering Dirichlet threshold of " << dirichlet_threshold << std::endl;
111
112 if (use_root_stencil || pL.get<bool>("filtered matrix: reuse graph"))
113 GetOStream(Runtime0) << "Reusing graph" << std::endl;
114 else
115 GetOStream(Runtime0) << "Generating new graph" << std::endl;
116
117 RCP<LWGraph> G = Get<RCP<LWGraph> >(currentLevel, "Graph");
119 FILE* f = fopen("graph.dat", "w");
120 size_t numGRows = G->GetNodeNumVertices();
121 for (size_t i = 0; i < numGRows; i++) {
122 // Set up filtering array
123 auto indsG = G->getNeighborVertices(i);
124 for (size_t j = 0; j < (size_t)indsG.length; j++) {
125 fprintf(f, "%d %d 1.0\n", (int)i, (int)indsG(j));
126 }
127 }
128 fclose(f);
129 }
130
131 RCP<ParameterList> fillCompleteParams(new ParameterList);
132 fillCompleteParams->set("No Nonlocal Changes", true);
133
134 RCP<Matrix> filteredA;
135 if (use_root_stencil) {
136 filteredA = MatrixFactory::Build(A->getCrsGraph());
137 filteredA->fillComplete(fillCompleteParams);
138 filteredA->resumeFill();
139 BuildNewUsingRootStencil(*A, *G, dirichlet_threshold, currentLevel, *filteredA, use_spread_lumping, DdomAllowGrowthRate, DdomCap);
140 filteredA->fillComplete(fillCompleteParams);
141
142 } else if (pL.get<bool>("filtered matrix: reuse graph")) {
143 filteredA = MatrixFactory::Build(A->getCrsGraph());
144 filteredA->resumeFill();
145 BuildReuse(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, *filteredA);
146 // only lump inside BuildReuse if lumping is true and use_spread_lumping is false
147 // note: they use_spread_lumping cannot be true if lumping is false
148
149 if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap);
150 filteredA->fillComplete(fillCompleteParams);
151
152 } else {
153 filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries());
154 BuildNew(*A, *G, (lumping != use_spread_lumping), dirichlet_threshold, *filteredA);
155 // only lump inside BuildNew if lumping is true and use_spread_lumping is false
156 // note: they use_spread_lumping cannot be true if lumping is false
157 if (use_spread_lumping) ExperimentalLumping(*A, *filteredA, DdomAllowGrowthRate, DdomCap);
158 filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams);
159 }
160
162 Xpetra::IO<SC, LO, GO, NO>::Write("filteredA.dat", *filteredA);
163
164 // original filtered A and actual A
165 Xpetra::IO<SC, LO, GO, NO>::Write("A.dat", *A);
166 RCP<Matrix> origFilteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getLocalMaxNumRowEntries());
167 BuildNew(*A, *G, lumping, dirichlet_threshold, *origFilteredA);
168 if (use_spread_lumping) ExperimentalLumping(*A, *origFilteredA, DdomAllowGrowthRate, DdomCap);
169 origFilteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams);
170 Xpetra::IO<SC, LO, GO, NO>::Write("origFilteredA.dat", *origFilteredA);
171 }
172
173 filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
174
175 if (pL.get<bool>("filtered matrix: reuse eigenvalue")) {
176 // Reuse max eigenvalue from A
177 // It is unclear what eigenvalue is the best for the smoothing, but we already may have
178 // the D^{-1}A estimate in A, may as well use it.
179 // NOTE: ML does that too
180 filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
181 }
182
183 Set(currentLevel, "A", filteredA);
184}
185
186// Epetra's API allows direct access to row array.
187// Tpetra's API does not, providing only ArrayView<const .>
188// But in most situations we are currently interested in, it is safe to assume
189// that the view is to the actual data. So this macro directs the code to do
190// const_cast, and modify the entries directly. This allows us to avoid
191// replaceLocalValues() call which is quite expensive due to all the searches.
192//#define ASSUME_DIRECT_ACCESS_TO_ROW // See github issue 10883#issuecomment-1256676340
193
194// Both Epetra and Tpetra matrix-matrix multiply use the following trick:
195// if an entry of the left matrix is zero, it does not compute or store the
196// zero value.
197//
198// This trick allows us to bypass constructing a new matrix. Instead, we
199// make a deep copy of the original one, and fill it in with zeros, which
200// are ignored during the prolongator smoothing.
201template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
203 BuildReuse(const Matrix& A, const LWGraph& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const {
204 using TST = typename Teuchos::ScalarTraits<SC>;
205 SC zero = TST::zero();
206
207 size_t blkSize = A.GetFixedBlockSize();
208
209 ArrayView<const LO> inds;
210 ArrayView<const SC> valsA;
211#ifdef ASSUME_DIRECT_ACCESS_TO_ROW
212 ArrayView<SC> vals;
213#else
214 Array<SC> vals;
215#endif
216
217 Array<char> filter(std::max(blkSize * G.GetImportMap()->getLocalNumElements(),
218 A.getColMap()->getLocalNumElements()),
219 0);
220
221 size_t numGRows = G.GetNodeNumVertices();
222 for (size_t i = 0; i < numGRows; i++) {
223 // Set up filtering array
224 auto indsG = G.getNeighborVertices(i);
225 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
226 for (size_t k = 0; k < blkSize; k++)
227 filter[indsG(j) * blkSize + k] = 1;
228
229 for (size_t k = 0; k < blkSize; k++) {
230 LO row = i * blkSize + k;
231
232 A.getLocalRowView(row, inds, valsA);
233
234 size_t nnz = inds.size();
235 if (nnz == 0)
236 continue;
237
238#ifdef ASSUME_DIRECT_ACCESS_TO_ROW
239 // Transform ArrayView<const SC> into ArrayView<SC>
240 ArrayView<const SC> vals1;
241 filteredA.getLocalRowView(row, inds, vals1);
242 vals = ArrayView<SC>(const_cast<SC*>(vals1.getRawPtr()), nnz);
243
244 memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz * sizeof(SC));
245#else
246 vals = Array<SC>(valsA);
247#endif
248
249 SC ZERO = Teuchos::ScalarTraits<SC>::zero();
250 // SC ONE = Teuchos::ScalarTraits<SC>::one();
251 SC A_rowsum = ZERO, F_rowsum = ZERO;
252 for (LO l = 0; l < (LO)inds.size(); l++)
253 A_rowsum += valsA[l];
254
255 if (lumping == false) {
256 for (size_t j = 0; j < nnz; j++)
257 if (!filter[inds[j]])
258 vals[j] = zero;
259
260 } else {
261 LO diagIndex = -1;
262 SC diagExtra = zero;
263
264 for (size_t j = 0; j < nnz; j++) {
265 if (filter[inds[j]]) {
266 if (inds[j] == row) {
267 // Remember diagonal position
268 diagIndex = j;
269 }
270 continue;
271 }
272
273 diagExtra += vals[j];
274
275 vals[j] = zero;
276 }
277
278 // Lump dropped entries
279 // NOTE
280 // * Does it make sense to lump for elasticity?
281 // * Is it different for diffusion and elasticity?
282 // SC diagA = ZERO;
283 if (diagIndex != -1) {
284 // diagA = vals[diagIndex];
285 vals[diagIndex] += diagExtra;
286 if (dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) {
287 // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA);
288 for (LO l = 0; l < (LO)nnz; l++)
289 F_rowsum += vals[l];
290 // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum);
291 vals[diagIndex] = TST::one();
292 }
293 }
294 }
295
296#ifndef ASSUME_DIRECT_ACCESS_TO_ROW
297 // Because we used a column map in the construction of the matrix
298 // we can just use insertLocalValues here instead of insertGlobalValues
299 filteredA.replaceLocalValues(row, inds, vals);
300#endif
301 }
302
303 // Reset filtering array
304 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
305 for (size_t k = 0; k < blkSize; k++)
306 filter[indsG(j) * blkSize + k] = 0;
307 }
308}
309
310template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
312 BuildNew(const Matrix& A, const LWGraph& G, const bool lumping, double dirichletThresh, Matrix& filteredA) const {
313 using TST = typename Teuchos::ScalarTraits<SC>;
314 SC zero = Teuchos::ScalarTraits<SC>::zero();
315
316 size_t blkSize = A.GetFixedBlockSize();
317
318 ArrayView<const LO> indsA;
319 ArrayView<const SC> valsA;
320 Array<LO> inds;
321 Array<SC> vals;
322
323 Array<char> filter(blkSize * G.GetImportMap()->getLocalNumElements(), 0);
324
325 size_t numGRows = G.GetNodeNumVertices();
326 for (size_t i = 0; i < numGRows; i++) {
327 // Set up filtering array
328 auto indsG = G.getNeighborVertices(i);
329 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
330 for (size_t k = 0; k < blkSize; k++)
331 filter[indsG(j) * blkSize + k] = 1;
332
333 for (size_t k = 0; k < blkSize; k++) {
334 LO row = i * blkSize + k;
335
336 A.getLocalRowView(row, indsA, valsA);
337
338 size_t nnz = indsA.size();
339 if (nnz == 0)
340 continue;
341
342 inds.resize(indsA.size());
343 vals.resize(valsA.size());
344
345 size_t numInds = 0;
346 if (lumping == false) {
347 for (size_t j = 0; j < nnz; j++)
348 if (filter[indsA[j]]) {
349 inds[numInds] = indsA[j];
350 vals[numInds] = valsA[j];
351 numInds++;
352 }
353
354 } else {
355 LO diagIndex = -1;
356 SC diagExtra = zero;
357
358 for (size_t j = 0; j < nnz; j++) {
359 if (filter[indsA[j]]) {
360 inds[numInds] = indsA[j];
361 vals[numInds] = valsA[j];
362
363 // Remember diagonal position
364 if (inds[numInds] == row)
365 diagIndex = numInds;
366
367 numInds++;
368
369 } else {
370 diagExtra += valsA[j];
371 }
372 }
373
374 // Lump dropped entries
375 // NOTE
376 // * Does it make sense to lump for elasticity?
377 // * Is it different for diffusion and elasticity?
378 if (diagIndex != -1) {
379 vals[diagIndex] += diagExtra;
380 if (dirichletThresh >= 0.0 && TST::real(vals[diagIndex]) <= dirichletThresh) {
381 // SC A_rowsum = ZERO, F_rowsum = ZERO;
382 // printf("WARNING: row %d diag(Afiltered) = %8.2e diag(A)=%8.2e\n",row,vals[diagIndex],diagA);
383 // for(LO l = 0; l < (LO)nnz; l++)
384 // F_rowsum += vals[l];
385 // printf(" : A rowsum = %8.2e F rowsum = %8.2e\n",A_rowsum,F_rowsum);
386 vals[diagIndex] = TST::one();
387 }
388 }
389 }
390 inds.resize(numInds);
391 vals.resize(numInds);
392
393 // Because we used a column map in the construction of the matrix
394 // we can just use insertLocalValues here instead of insertGlobalValues
395 filteredA.insertLocalValues(row, inds, vals);
396 }
397
398 // Reset filtering array
399 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
400 for (size_t k = 0; k < blkSize; k++)
401 filter[indsG(j) * blkSize + k] = 0;
402 }
403}
404
405template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
407 BuildNewUsingRootStencil(const Matrix& A, const LWGraph& G, double dirichletThresh, Level& currentLevel, Matrix& filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const {
408 using TST = typename Teuchos::ScalarTraits<SC>;
409 using Teuchos::arcp_const_cast;
410 SC ZERO = Teuchos::ScalarTraits<SC>::zero();
411 SC ONE = Teuchos::ScalarTraits<SC>::one();
412 LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
413
414 // In the badAggNeighbors loop, if the entry has any number besides NAN, I add it to the diagExtra and then zero the guy.
415 RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(currentLevel, "Aggregates");
416 RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
417 LO numAggs = aggregates->GetNumAggregates();
418
419 size_t numNodes = G.GetNodeNumVertices();
420 size_t blkSize = A.GetFixedBlockSize();
421 size_t numRows = A.getMap()->getLocalNumElements();
422 ArrayView<const LO> indsA;
423 ArrayView<const SC> valsA;
424 ArrayRCP<const size_t> rowptr;
425 ArrayRCP<const LO> inds;
426 ArrayRCP<const SC> vals_const;
427 ArrayRCP<SC> vals;
428
429 // We're going to grab the vals array from filteredA and then blitz it with NAN as a placeholder for "entries that have
430 // not yey been touched." If I see an entry in the primary loop that has a zero, then I assume it has been nuked by
431 // it's symmetric pair, so I add it to the diagonal. If it has a NAN, process as normal.
432 RCP<CrsMatrix> filteredAcrs = dynamic_cast<const CrsMatrixWrap*>(&filteredA)->getCrsMatrix();
433 filteredAcrs->getAllValues(rowptr, inds, vals_const);
434 vals = arcp_const_cast<SC>(vals_const);
435 Array<bool> vals_dropped_indicator(vals.size(), false);
436
437 // Check map nesting
438 RCP<const Map> rowMap = A.getRowMap();
439 RCP<const Map> colMap = A.getColMap();
440 bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
441 TEUCHOS_TEST_FOR_EXCEPTION(!goodMap, Exceptions::RuntimeError, "FilteredAFactory: Maps are not nested");
442
443 // Since we're going to symmetrize this
444 Array<LO> diagIndex(numRows, INVALID);
445 Array<SC> diagExtra(numRows, ZERO);
446
447 // Lists of nodes in each aggregate
448 struct {
449 // GH: For now, copy everything to host until we properly set this factory to run device code
450 // Instead, we'll copy data into HostMirrors and run the algorithms on host, saving optimization for later.
451 typename Aggregates::LO_view ptr, nodes, unaggregated;
452 typename Aggregates::LO_view::HostMirror ptr_h, nodes_h, unaggregated_h;
453 } nodesInAgg;
454 aggregates->ComputeNodesInAggregate(nodesInAgg.ptr, nodesInAgg.nodes, nodesInAgg.unaggregated);
455 nodesInAgg.ptr_h = Kokkos::create_mirror_view(nodesInAgg.ptr);
456 nodesInAgg.nodes_h = Kokkos::create_mirror_view(nodesInAgg.nodes);
457 nodesInAgg.unaggregated_h = Kokkos::create_mirror_view(nodesInAgg.unaggregated);
458 Kokkos::deep_copy(nodesInAgg.ptr_h, nodesInAgg.ptr);
459 Kokkos::deep_copy(nodesInAgg.nodes_h, nodesInAgg.nodes);
460 Kokkos::deep_copy(nodesInAgg.unaggregated_h, nodesInAgg.unaggregated);
461 Teuchos::ArrayRCP<const LO> vertex2AggId = aggregates->GetVertex2AggId()->getData(0); // GH: this is needed on device, grab the pointer after we call ComputeNodesInAggregate
462
463 LO graphNumCols = G.GetImportMap()->getLocalNumElements();
464 Array<bool> filter(graphNumCols, false);
465
466 // Loop over the unaggregated nodes. Blitz those rows. We don't want to smooth singletons.
467 for (LO i = 0; i < (LO)nodesInAgg.unaggregated_h.extent(0); i++) {
468 for (LO m = 0; m < (LO)blkSize; m++) {
469 LO row = amalgInfo->ComputeLocalDOF(nodesInAgg.unaggregated_h(i), m);
470 if (row >= (LO)numRows) continue;
471 size_t index_start = rowptr[row];
472 A.getLocalRowView(row, indsA, valsA);
473 for (LO k = 0; k < (LO)indsA.size(); k++) {
474 if (row == indsA[k]) {
475 vals[index_start + k] = ONE;
476 diagIndex[row] = k;
477 } else
478 vals[index_start + k] = ZERO;
479 }
480 }
481 } // end nodesInAgg.unaggregated.extent(0);
482
483 std::vector<LO> badCount(numAggs, 0);
484
485 // Find the biggest aggregate size in *nodes*
486 LO maxAggSize = 0;
487 for (LO i = 0; i < numAggs; i++)
488 maxAggSize = std::max(maxAggSize, nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i));
489
490 // Loop over all the aggregates
491 std::vector<LO> goodAggNeighbors(G.getLocalMaxNumRowEntries());
492 std::vector<LO> badAggNeighbors(std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes));
493
494 size_t numNewDrops = 0;
495 size_t numOldDrops = 0;
496 size_t numFixedDiags = 0;
497 size_t numSymDrops = 0;
498
499 for (LO i = 0; i < numAggs; i++) {
500 LO numNodesInAggregate = nodesInAgg.ptr_h(i + 1) - nodesInAgg.ptr_h(i);
501 if (numNodesInAggregate == 0) continue;
502
503 // Find the root *node*
504 LO root_node = INVALID;
505 for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
506 if (aggregates->IsRoot(nodesInAgg.nodes_h(k))) {
507 root_node = nodesInAgg.nodes_h(k);
508 break;
509 }
510 }
511
512 TEUCHOS_TEST_FOR_EXCEPTION(root_node == INVALID,
513 Exceptions::RuntimeError, "MueLu::FilteredAFactory::BuildNewUsingRootStencil: Cannot find root node");
514
515 // Find the list of "good" node neighbors (aka nodes which border the root node in the Graph G)
516 auto goodNodeNeighbors = G.getNeighborVertices(root_node);
517
518 // Now find the list of "good" aggregate neighbors (aka the aggregates neighbor the root node in the Graph G)
519 goodAggNeighbors.resize(0);
520 for (LO k = 0; k < (LO)goodNodeNeighbors.length; k++) {
521 goodAggNeighbors.push_back(vertex2AggId[goodNodeNeighbors(k)]);
522 }
523 sort_and_unique(goodAggNeighbors);
524
525 // Now we get the list of "bad" aggregate neighbors (aka aggregates which border the
526 // root node in the original matrix A, which are not goodNodeNeighbors). Since we
527 // don't have an amalgamated version of the original matrix, we use the matrix directly
528 badAggNeighbors.resize(0);
529 for (LO j = 0; j < (LO)blkSize; j++) {
530 LO row = amalgInfo->ComputeLocalDOF(root_node, j);
531 if (row >= (LO)numRows) continue;
532 A.getLocalRowView(row, indsA, valsA);
533 for (LO k = 0; k < (LO)indsA.size(); k++) {
534 if ((indsA[k] < (LO)numRows) && (TST::magnitude(valsA[k]) != TST::magnitude(ZERO))) {
535 LO node = amalgInfo->ComputeLocalNode(indsA[k]);
536 LO agg = vertex2AggId[node];
537 if (!std::binary_search(goodAggNeighbors.begin(), goodAggNeighbors.end(), agg))
538 badAggNeighbors.push_back(agg);
539 }
540 }
541 }
542 sort_and_unique(badAggNeighbors);
543
544 // Go through the filtered graph and count the number of connections to the badAggNeighbors
545 // if there are 2 or more of these connections, remove them from the bad list.
546
547 for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
548 auto nodeNeighbors = G.getNeighborVertices(k);
549 for (LO kk = 0; kk < nodeNeighbors.length; kk++) {
550 if ((vertex2AggId[nodeNeighbors(kk)] >= 0) && (vertex2AggId[nodeNeighbors(kk)] < numAggs))
551 (badCount[vertex2AggId[nodeNeighbors(kk)]])++;
552 }
553 }
554 std::vector<LO> reallyBadAggNeighbors(std::min(G.getLocalMaxNumRowEntries() * maxAggSize, numNodes));
555 reallyBadAggNeighbors.resize(0);
556 for (LO k = 0; k < (LO)badAggNeighbors.size(); k++) {
557 if (badCount[badAggNeighbors[k]] <= 1) reallyBadAggNeighbors.push_back(badAggNeighbors[k]);
558 }
559 for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
560 auto nodeNeighbors = G.getNeighborVertices(k);
561 for (LO kk = 0; kk < nodeNeighbors.length; kk++) {
562 if ((vertex2AggId[nodeNeighbors(kk)] >= 0) && (vertex2AggId[nodeNeighbors(kk)] < numAggs))
563 badCount[vertex2AggId[nodeNeighbors(kk)]] = 0;
564 }
565 }
566
567 // For each of the reallyBadAggNeighbors, we go and blitz their connections to dofs in this aggregate.
568 // We remove the INVALID marker when we do this so we don't wind up doubling this up later
569 for (LO b = 0; b < (LO)reallyBadAggNeighbors.size(); b++) {
570 LO bad_agg = reallyBadAggNeighbors[b];
571 for (LO k = nodesInAgg.ptr_h(bad_agg); k < nodesInAgg.ptr_h(bad_agg + 1); k++) {
572 LO bad_node = nodesInAgg.nodes_h(k);
573 for (LO j = 0; j < (LO)blkSize; j++) {
574 LO bad_row = amalgInfo->ComputeLocalDOF(bad_node, j);
575 if (bad_row >= (LO)numRows) continue;
576 size_t index_start = rowptr[bad_row];
577 A.getLocalRowView(bad_row, indsA, valsA);
578 for (LO l = 0; l < (LO)indsA.size(); l++) {
579 if (indsA[l] < (LO)numRows && vertex2AggId[amalgInfo->ComputeLocalNode(indsA[l])] == i && vals_dropped_indicator[index_start + l] == false) {
580 vals_dropped_indicator[index_start + l] = true;
581 vals[index_start + l] = ZERO;
582 diagExtra[bad_row] += valsA[l];
583 numSymDrops++;
584 }
585 }
586 }
587 }
588 }
589
590 // Now lets fill the rows in this aggregate and figure out the diagonal lumping
591 // We loop over each node in the aggregate and then over the neighbors of that node
592
593 for (LO k = nodesInAgg.ptr_h(i); k < nodesInAgg.ptr_h(i + 1); k++) {
594 LO row_node = nodesInAgg.nodes_h(k);
595
596 // Set up filtering array
597 auto indsG = G.getNeighborVertices(row_node);
598 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
599 filter[indsG(j)] = true;
600
601 for (LO m = 0; m < (LO)blkSize; m++) {
602 LO row = amalgInfo->ComputeLocalDOF(row_node, m);
603 if (row >= (LO)numRows) continue;
604 size_t index_start = rowptr[row];
605 A.getLocalRowView(row, indsA, valsA);
606
607 for (LO l = 0; l < (LO)indsA.size(); l++) {
608 int col_node = amalgInfo->ComputeLocalNode(indsA[l]);
609 bool is_good = filter[col_node];
610 if (indsA[l] == row) {
611 diagIndex[row] = l;
612 vals[index_start + l] = valsA[l];
613 continue;
614 }
615
616 // If we've already dropped this guy (from symmetry above), then continue onward
617 if (vals_dropped_indicator[index_start + l] == true) {
618 if (is_good)
619 numOldDrops++;
620 else
621 numNewDrops++;
622 continue;
623 }
624
625 // FIXME: I'm assuming vertex2AggId is only length of the rowmap, so
626 // we won'd do secondary dropping on off-processor neighbors
627 if (is_good && indsA[l] < (LO)numRows) {
628 int agg = vertex2AggId[col_node];
629 if (std::binary_search(reallyBadAggNeighbors.begin(), reallyBadAggNeighbors.end(), agg))
630 is_good = false;
631 }
632
633 if (is_good) {
634 vals[index_start + l] = valsA[l];
635 } else {
636 if (!filter[col_node])
637 numOldDrops++;
638 else
639 numNewDrops++;
640 diagExtra[row] += valsA[l];
641 vals[index_start + l] = ZERO;
642 vals_dropped_indicator[index_start + l] = true;
643 }
644 } // end for l "indsA.size()" loop
645
646 } // end m "blkSize" loop
647
648 // Clear filtering array
649 for (size_t j = 0; j < as<size_t>(indsG.length); j++)
650 filter[indsG(j)] = false;
651
652 } // end k loop over number of nodes in this agg
653 } // end i loop over numAggs
654
655 if (!use_spread_lumping) {
656 // Now do the diagonal modifications in one, final pass
657 for (LO row = 0; row < (LO)numRows; row++) {
658 if (diagIndex[row] != INVALID) {
659 size_t index_start = rowptr[row];
660 size_t diagIndexInMatrix = index_start + diagIndex[row];
661 // printf("diag_vals pre update = %8.2e\n", vals[diagIndex] );
662 vals[diagIndexInMatrix] += diagExtra[row];
663 SC A_rowsum = ZERO, A_absrowsum = ZERO, F_rowsum = ZERO;
664
665 if ((dirichletThresh >= 0.0 && TST::real(vals[diagIndexInMatrix]) <= dirichletThresh) || TST::real(vals[diagIndexInMatrix]) == ZERO) {
667 A.getLocalRowView(row, indsA, valsA);
668 // SC diagA = valsA[diagIndex[row]];
669 // printf("WARNING: row %d (diagIndex=%d) diag(Afiltered) = %8.2e diag(A)=%8.2e numInds = %d\n",row,diagIndex[row],vals[diagIndexInMatrix],diagA,(LO)indsA.size());
670
671 for (LO l = 0; l < (LO)indsA.size(); l++) {
672 A_rowsum += valsA[l];
673 A_absrowsum += std::abs(valsA[l]);
674 }
675 for (LO l = 0; l < (LO)indsA.size(); l++)
676 F_rowsum += vals[index_start + l];
677 // printf(" : A rowsum = %8.2e |A| rowsum = %8.2e rowsum = %8.2e\n",A_rowsum,A_absrowsum,F_rowsum);
679 // printf(" Avals =");
680 // for(LO l = 0; l < (LO)indsA.size(); l++)
681 // printf("%d(%8.2e)[%d] ",(LO)indsA[l],valsA[l],(LO)l);
682 // printf("\n");
683 // printf(" Fvals =");
684 // for(LO l = 0; l < (LO)indsA.size(); l++)
685 // if(vals[index_start+l] != ZERO)
686 // printf("%d(%8.2e)[%d] ",(LO)indsA[l],vals[index_start+l],(LO)l);
687 }
688 }
689 // Don't know what to do, so blitz the row and dump a one on the diagonal
690 for (size_t l = rowptr[row]; l < rowptr[row + 1]; l++) {
691 vals[l] = ZERO;
692 }
693 vals[diagIndexInMatrix] = TST::one();
694 numFixedDiags++;
695 }
696 } else {
697 GetOStream(Runtime0) << "WARNING: Row " << row << " has no diagonal " << std::endl;
698 }
699 } /*end row "numRows" loop"*/
700 }
701
702 // Copy all the goop out
703 for (LO row = 0; row < (LO)numRows; row++) {
704 filteredA.replaceLocalValues(row, inds(rowptr[row], rowptr[row + 1] - rowptr[row]), vals(rowptr[row], rowptr[row + 1] - rowptr[row]));
705 }
706 if (use_spread_lumping) ExperimentalLumping(A, filteredA, DdomAllowGrowthRate, DdomCap);
707
708 size_t g_newDrops = 0, g_oldDrops = 0, g_fixedDiags = 0;
709
710 MueLu_sumAll(A.getRowMap()->getComm(), numNewDrops, g_newDrops);
711 MueLu_sumAll(A.getRowMap()->getComm(), numOldDrops, g_oldDrops);
712 MueLu_sumAll(A.getRowMap()->getComm(), numFixedDiags, g_fixedDiags);
713 GetOStream(Runtime0) << "Filtering out " << g_newDrops << " edges, in addition to the " << g_oldDrops << " edges dropped earlier" << std::endl;
714 GetOStream(Runtime0) << "Fixing " << g_fixedDiags << " zero diagonal values" << std::endl;
715}
716
717// fancy lumping trying to not just move everything to the diagonal but to also consider moving
718// some lumping to the kept off-diagonals. We basically aim to not increase the diagonal
719// dominance in a row. In particular, the goal is that row i satisfies
720//
721// lumpedDiagDomMeasure_i <= rho2
722// or
723// lumpedDiagDomMeasure <= rho*unlumpedDiagDomMeasure
724//
725// NOTE: THIS CODE assumes direct access to a row. See comments above concerning
726// ASSUME_DIRECT_ACCESS_TO_ROW
727//
728template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
730 ExperimentalLumping(const Matrix& A, Matrix& filteredA, double irho, double irho2) const {
731 using TST = typename Teuchos::ScalarTraits<SC>;
732 SC zero = TST::zero();
733 SC one = TST::one();
734
735 ArrayView<const LO> inds;
736 ArrayView<const SC> vals;
737 ArrayView<const LO> finds;
738 ArrayView<SC> fvals;
739
740 SC PosOffSum, NegOffSum, PosOffDropSum, NegOffDropSum;
741 SC diag, gamma, alpha;
742 LO NumPosKept, NumNegKept;
743
744 SC noLumpDdom;
745 SC numer, denom;
746 SC PosFilteredSum, NegFilteredSum;
747 SC Target;
748
749 SC rho = as<Scalar>(irho);
750 SC rho2 = as<Scalar>(irho2);
751
752 for (LO row = 0; row < (LO)A.getRowMap()->getLocalNumElements(); row++) {
753 noLumpDdom = as<Scalar>(10000.0); // only used if diagonal is zero
754 // the whole idea sort of breaks down
755 // when the diagonal is zero. In particular,
756 // the old diag dominance ratio is infinity
757 // ... so what do we want for the new ddom
758 // ratio. Do we want to allow the diagonal
759 // to go negative, just to have a better ddom
760 // ratio? This current choice essentially
761 // changes 'Target' to a large number
762 // meaning that we will allow the new
763 // ddom number to be fairly large (because
764 // the old one was infinity)
765
766 ArrayView<const SC> tvals;
767 A.getLocalRowView(row, inds, vals);
768 size_t nnz = inds.size();
769 if (nnz == 0) continue;
770 filteredA.getLocalRowView(row, finds, tvals); // assume 2 getLocalRowView()s
771 // have things in same order
772 fvals = ArrayView<SC>(const_cast<SC*>(tvals.getRawPtr()), nnz);
773
774 LO diagIndex = -1, fdiagIndex = -1;
775
776 PosOffSum = zero;
777 NegOffSum = zero;
778 PosOffDropSum = zero;
779 NegOffDropSum = zero;
780 diag = zero;
781 NumPosKept = 0;
782 NumNegKept = 0;
783
784 // first record diagonal, offdiagonal sums and off diag dropped sums
785 for (size_t j = 0; j < nnz; j++) {
786 if (inds[j] == row) {
787 diagIndex = j;
788 diag = vals[j];
789 } else { // offdiagonal
790 if (TST::real(vals[j]) > TST::real(zero))
791 PosOffSum += vals[j];
792 else
793 NegOffSum += vals[j];
794 }
795 }
796 PosOffDropSum = PosOffSum;
797 NegOffDropSum = NegOffSum;
798 NumPosKept = 0;
799 NumNegKept = 0;
800 LO j = 0;
801 for (size_t jj = 0; jj < (size_t)finds.size(); jj++) {
802 while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
803 // inds ... but perhaps has some entries missing
804 if (finds[jj] == row)
805 fdiagIndex = jj;
806 else {
807 if (TST::real(vals[j]) > TST::real(zero)) {
808 PosOffDropSum -= fvals[jj];
809 if (TST::real(fvals[jj]) != TST::real(zero)) NumPosKept++;
810 } else {
811 NegOffDropSum -= fvals[jj];
812 if (TST::real(fvals[jj]) != TST::real(zero)) NumNegKept++;
813 }
814 }
815 }
816
817 // measure of diagonal dominance if no lumping is done.
818 if (TST::magnitude(diag) != TST::magnitude(zero))
819 noLumpDdom = (PosOffSum - NegOffSum) / diag;
820
821 // Target is an acceptable diagonal dominance ratio
822 // which should really be larger than 1
823
824 Target = rho * noLumpDdom;
825 if (TST::magnitude(Target) <= TST::magnitude(rho)) Target = rho2;
826
827 PosFilteredSum = PosOffSum - PosOffDropSum;
828 NegFilteredSum = NegOffSum - NegOffDropSum;
829 // Note: PosNotFilterdSum is not equal to the sum of the
830 // positive entries after lumping. It just reflects the
831 // pos offdiag sum of the filtered matrix before lumping
832 // and does not account for negative dropped terms lumped
833 // to the positive kept terms.
834
835 // dropped positive offdiags always go to the diagonal as these
836 // always improve diagonal dominance.
837
838 diag += PosOffDropSum;
839
840 // now lets work on lumping dropped negative offdiags
841 gamma = -NegOffDropSum - PosFilteredSum;
842
843 if (TST::real(gamma) < TST::real(zero)) {
844 // the total amount of negative dropping is less than PosFilteredSum,
845 // so we can distribute this dropping to pos offdiags. After lumping
846 // the sum of the pos offdiags is just -gamma so we just assign pos
847 // offdiags proportional to vals[j]/PosFilteredSum
848 // Note: in this case the diagonal is not changed as all lumping
849 // occurs to the pos offdiags
850
851 if (fdiagIndex != -1) fvals[fdiagIndex] = diag;
852 j = 0;
853 for (LO jj = 0; jj < (LO)finds.size(); jj++) {
854 while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
855 // inds ... but perhaps has some entries missing
856 if ((j != diagIndex) && (TST::real(vals[j]) > TST::real(zero)) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)))
857 fvals[jj] = -gamma * (vals[j] / PosFilteredSum);
858 }
859 } else {
860 // So there are more negative values that need lumping than kept
861 // positive offdiags. Meaning there is enough negative lumping to
862 // completely clear out all pos offdiags. If we lump all negs
863 // to pos off diags, we'd actually change them to negative. We
864 // only do this if we are desperate. Otherwise, we'll clear out
865 // all the positive kept offdiags and try to lump the rest
866 // somewhere else. We defer the clearing of pos off diags
867 // to see first if we are going to be desperate.
868
869 bool flipPosOffDiagsToNeg = false;
870
871 // Even if we lumped by zeroing positive offdiags, we are still
872 // going to have more lumping to distribute to either
873 // 1) the diagonal
874 // 2) the kept negative offdiags
875 // 3) the kept positive offdiags (desperate)
876
877 // Let's first considering lumping the remaining neg offdiag stuff
878 // to the diagonal ... if this does not increase the diagonal
879 // dominance ratio too much (given by rho).
880
881 if ((TST::real(diag) > TST::real(gamma)) &&
882 (TST::real((-NegFilteredSum) / (diag - gamma)) <= TST::real(Target))) {
883 // 1st if term above insures that resulting diagonal (=diag-gamma)
884 // is positive. . The left side of 2nd term is the diagonal dominance
885 // if we lump the remaining stuff (gamma) to the diagonal. Recall,
886 // that now there are no positive off-diags so the sum(abs(offdiags))
887 // is just the negative of NegFilteredSum
888
889 if (fdiagIndex != -1) fvals[fdiagIndex] = diag - gamma;
890 } else if (NumNegKept > 0) {
891 // need to do some lumping to neg offdiags to avoid a large
892 // increase in diagonal dominance. We first compute alpha
893 // which measures how much gamma should go to the
894 // negative offdiags. The rest will go to the diagonal
895
896 numer = -NegFilteredSum - Target * (diag - gamma);
897 denom = gamma * (Target - TST::one());
898
899 // make sure that alpha is between 0 and 1 ... and that it doesn't
900 // result in a sign flip
901 // Note: when alpha is set to 1, then the diagonal is not modified
902 // and the negative offdiags just get shifted from those
903 // removed and those kept, meaning that the digaonal dominance
904 // should be the same as before
905 //
906 // can alpha be negative? It looks like denom should always
907 // be positive. The 'if' statement above
908 // Normally, diag-gamma should also be positive (but if it
909 // is negative then numer is guaranteed to be positve).
910 // look at the 'if' above,
911 // if (( TST::real(diag) > TST::real(gamma)) &&
912 // ( TST::real((-NegFilteredSum)/(diag - gamma)) <= TST::real(Target))) {
913 //
914 // Should guarantee that numer is positive. This is obvious when
915 // the second condition is false. When it is the first condition that
916 // is false, it follows that the two indiviudal terms in the numer
917 // formula must be positive.
918
919 if (TST::magnitude(denom) < TST::magnitude(numer))
920 alpha = TST::one();
921 else
922 alpha = numer / denom;
923 if (TST::real(alpha) < TST::real(zero)) alpha = zero;
924 if (TST::real(diag) < TST::real((one - alpha) * gamma)) alpha = TST::one();
925
926 // first change the diagonal
927
928 if (fdiagIndex != -1) fvals[fdiagIndex] = diag - (one - alpha) * gamma;
929
930 // after lumping the sum of neg offdiags will be NegFilteredSum
931 // + alpha*gamma. That is the remaining negative entries altered
932 // by the percent (=alpha) of stuff (=gamma) that needs to be
933 // lumped after taking into account lumping to pos offdiags
934
935 // Do this by assigning a fraction of NegFilteredSum+alpha*gamma
936 // proportional to vals[j]/NegFilteredSum
937
938 SC temp = (NegFilteredSum + alpha * gamma) / NegFilteredSum;
939 j = 0;
940 for (LO jj = 0; jj < (LO)finds.size(); jj++) {
941 while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
942 // inds ... but perhaps has some entries missing
943 if ((jj != fdiagIndex) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) &&
944 (TST::real(vals[j]) < TST::real(zero)))
945 fvals[jj] = temp * vals[j];
946 }
947 } else { // desperate case
948 // So we don't have any kept negative offdiags ...
949
950 if (NumPosKept > 0) {
951 // luckily we can push this stuff to the pos offdiags
952 // which now makes them negative
953 flipPosOffDiagsToNeg = true;
954
955 j = 0;
956 for (LO jj = 0; jj < (LO)finds.size(); jj++) {
957 while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
958 // inds ... but perhaps has some entries missing
959 if ((j != diagIndex) && (TST::magnitude(fvals[jj]) != TST::magnitude(zero)) &&
960 (TST::real(vals[j]) > TST::real(zero)))
961 fvals[jj] = -gamma / ((SC)NumPosKept);
962 }
963 }
964 // else abandon rowsum preservation and do nothing
965 }
966 if (!flipPosOffDiagsToNeg) { // not desperate so we now zero out
967 // all pos terms including some
968 // not originally filtered
969 // but zeroed due to lumping
970 j = 0;
971 for (LO jj = 0; jj < (LO)finds.size(); jj++) {
972 while (inds[j] != finds[jj]) j++; // assumes that finds is in the same order as
973 // inds ... but perhaps has some entries missing
974 if ((jj != fdiagIndex) && (TST::real(vals[j]) > TST::real(zero))) fvals[jj] = zero;
975 }
976 }
977 } // positive gamma else
978
979 } // loop over all rows
980}
981
982} // namespace MueLu
983
984#endif // MUELU_FILTEREDAFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MUELU_FILTEREDAFACTORY_LOTS_OF_PRINTING
#define MueLu_sumAll(rcpComm, in, out)
Kokkos::View< local_ordinal_type *, device_type > LO_view
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
void BuildNewUsingRootStencil(const Matrix &A, const LWGraph &G, double dirichletThresh, Level &currentLevel, Matrix &filteredA, bool use_spread_lumping, double DdomAllowGrowthRate, double DdomCap) const
void BuildNew(const Matrix &A, const LWGraph &G, const bool lumping, double dirichletThresh, Matrix &filteredA) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &currentLevel) const
Input.
void BuildReuse(const Matrix &A, const LWGraph &G, const bool lumping, double dirichletThresh, Matrix &filteredA) const
void ExperimentalLumping(const Matrix &A, Matrix &filteredA, double rho, double rho2) const
void Build(Level &currentLevel) const
Build method.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
virtual const Teuchos::ParameterList & GetParameterList() const
static bool MapsAreNested(const Xpetra::Map< DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &rowMap, const Xpetra::Map< DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &colMap)
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
Namespace for MueLu classes and methods.
@ Runtime0
One-liner description of what is happening.
void sort_and_unique(T &array)