MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
11#define MUELU_TENTATIVEPFACTORY_DEF_HPP
12
13#include <Xpetra_MapFactory.hpp>
14#include <Xpetra_Map.hpp>
15#include <Xpetra_CrsMatrix.hpp>
16#include <Xpetra_CrsGraphFactory.hpp>
17#include <Xpetra_Matrix.hpp>
18#include <Xpetra_MatrixMatrix.hpp>
19#include <Xpetra_MultiVector.hpp>
20#include <Xpetra_MultiVectorFactory.hpp>
21#include <Xpetra_VectorFactory.hpp>
22#include <Xpetra_Import.hpp>
23#include <Xpetra_ImportFactory.hpp>
24#include <Xpetra_CrsMatrixWrap.hpp>
25#include <Xpetra_StridedMap.hpp>
26#include <Xpetra_StridedMapFactory.hpp>
27#include <Xpetra_IO.hpp>
28
29#include "Xpetra_TpetraBlockCrsMatrix.hpp"
30
32
33#include "MueLu_Aggregates.hpp"
34#include "MueLu_AmalgamationInfo.hpp"
35#include "MueLu_MasterList.hpp"
36#include "MueLu_Monitor.hpp"
37#include "MueLu_PerfUtils.hpp"
38#include "MueLu_Utilities.hpp"
39
40namespace MueLu {
41
42template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
44 RCP<ParameterList> validParamList = rcp(new ParameterList());
45
46#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
47 SET_VALID_ENTRY("tentative: calculate qr");
48 SET_VALID_ENTRY("tentative: build coarse coordinates");
49 SET_VALID_ENTRY("tentative: constant column sums");
50#undef SET_VALID_ENTRY
51 validParamList->set<std::string>("Nullspace name", "Nullspace", "Name for the input nullspace");
52
53 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
54 validParamList->set<RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
55 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
56 validParamList->set<RCP<const FactoryBase> >("Scaled Nullspace", Teuchos::null, "Generating factory of the scaled nullspace");
57 validParamList->set<RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
58 validParamList->set<RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
59 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory of the coordinates");
60 validParamList->set<RCP<const FactoryBase> >("Node Comm", Teuchos::null, "Generating factory of the node level communicator");
61
62 // Make sure we don't recursively validate options for the matrixmatrix kernels
63 ParameterList norecurse;
64 norecurse.disableRecursiveValidation();
65 validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
66
67 return validParamList;
68}
69
70template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
72 const ParameterList& pL = GetParameterList();
73 // NOTE: This guy can only either be 'Nullspace' or 'Scaled Nullspace' or else the validator above will cause issues
74 std::string nspName = "Nullspace";
75 if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
76
77 Input(fineLevel, "A");
78 Input(fineLevel, "Aggregates");
79 Input(fineLevel, nspName);
80 Input(fineLevel, "UnAmalgamationInfo");
81 Input(fineLevel, "CoarseMap");
82 if (fineLevel.GetLevelID() == 0 &&
83 fineLevel.IsAvailable("Coordinates", NoFactory::get()) && // we have coordinates (provided by user app)
84 pL.get<bool>("tentative: build coarse coordinates")) { // and we want coordinates on other levels
85 bTransferCoordinates_ = true; // then set the transfer coordinates flag to true
86 Input(fineLevel, "Coordinates");
87 } else if (bTransferCoordinates_) {
88 Input(fineLevel, "Coordinates");
89 }
90}
91
92template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94 return BuildP(fineLevel, coarseLevel);
95}
96
97template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
99 FactoryMonitor m(*this, "Build", coarseLevel);
100 typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
101 typedef Xpetra::MultiVector<coordinate_type, LO, GO, NO> RealValuedMultiVector;
102 typedef Xpetra::MultiVectorFactory<coordinate_type, LO, GO, NO> RealValuedMultiVectorFactory;
103
104 const ParameterList& pL = GetParameterList();
105 std::string nspName = "Nullspace";
106 if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
107
108 RCP<Matrix> Ptentative;
109 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
110 RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(fineLevel, "Aggregates");
111 // No coarse DoFs so we need to bail by setting Ptentattive to null and returning
112 // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
113 if (aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
114 Ptentative = Teuchos::null;
115 Set(coarseLevel, "P", Ptentative);
116 return;
117 }
118
119 RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(fineLevel, "UnAmalgamationInfo");
120 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, nspName);
121 RCP<const Map> coarseMap = Get<RCP<const Map> >(fineLevel, "CoarseMap");
122 RCP<RealValuedMultiVector> fineCoords;
124 fineCoords = Get<RCP<RealValuedMultiVector> >(fineLevel, "Coordinates");
125 }
126
127 // FIXME: We should remove the NodeComm on levels past the threshold
128 if (fineLevel.IsAvailable("Node Comm")) {
129 RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel, "Node Comm");
130 Set<RCP<const Teuchos::Comm<int> > >(coarseLevel, "Node Comm", nodeComm);
131 }
132
133 // NOTE: We check DomainMap here rather than RowMap because those are different for BlockCrs matrices
134 TEUCHOS_TEST_FOR_EXCEPTION(A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
135 Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: Size mismatch between A and Nullspace");
136
137 RCP<MultiVector> coarseNullspace;
138 RCP<RealValuedMultiVector> coarseCoords;
139
141 //*** Create the coarse coordinates ***
142 // First create the coarse map and coarse multivector
143 ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
144 LO blkSize = 1;
145 if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
146 blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
147 }
148 GO indexBase = coarseMap->getIndexBase();
149 LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
150 Array<GO> nodeList(numCoarseNodes);
151 const int numDimensions = fineCoords->getNumVectors();
152
153 for (LO i = 0; i < numCoarseNodes; i++) {
154 nodeList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase;
155 }
156 RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
157 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
158 nodeList,
159 indexBase,
160 fineCoords->getMap()->getComm());
161 coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
162
163 // Create overlapped fine coordinates to reduce global communication
164 RCP<RealValuedMultiVector> ghostedCoords;
165 if (aggregates->AggregatesCrossProcessors()) {
166 RCP<const Map> aggMap = aggregates->GetMap();
167 RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
168
169 ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
170 ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
171 } else {
172 ghostedCoords = fineCoords;
173 }
174
175 // Get some info about aggregates
176 int myPID = coarseCoordsMap->getComm()->getRank();
177 LO numAggs = aggregates->GetNumAggregates();
178 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
179 const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
180 const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
181
182 // Fill in coarse coordinates
183 for (int dim = 0; dim < numDimensions; ++dim) {
184 ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
185 ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
186
187 for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
188 if (procWinner[lnode] == myPID &&
189 lnode < fineCoordsData.size() &&
190 vertex2AggID[lnode] < coarseCoordsData.size() &&
191 Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) == false) {
192 coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
193 }
194 }
195 for (LO agg = 0; agg < numAggs; agg++) {
196 coarseCoordsData[agg] /= aggSizes[agg];
197 }
198 }
199 }
200
201 if (!aggregates->AggregatesCrossProcessors()) {
202 if (Xpetra::Helpers<SC, LO, GO, NO>::isTpetraBlockCrs(A)) {
203 BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
204 } else {
205 BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
206 }
207 } else
208 BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
209
210 // If available, use striding information of fine level matrix A for range
211 // map and coarseMap as domain map; otherwise use plain range map of
212 // Ptent = plain range map of A for range map and coarseMap as domain map.
213 // NOTE:
214 // The latter is not really safe, since there is no striding information
215 // for the range map. This is not really a problem, since striding
216 // information is always available on the intermedium levels and the
217 // coarsest levels.
218 if (A->IsView("stridedMaps") == true)
219 Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
220 else
221 Ptentative->CreateView("stridedMaps", Ptentative->getRangeMap(), coarseMap);
222
224 Set(coarseLevel, "Coordinates", coarseCoords);
225 }
226 Set(coarseLevel, "Nullspace", coarseNullspace);
227 Set(coarseLevel, "P", Ptentative);
228
229 if (IsPrint(Statistics2)) {
230 RCP<ParameterList> params = rcp(new ParameterList());
231 params->set("printLoadBalancingInfo", true);
232 GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
233 }
234}
235
236template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
238 BuildPuncoupledBlockCrs(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
239 RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
240 /* This routine generates a BlockCrs P for a BlockCrs A. There are a few assumptions here, which meet the use cases we care about, but could
241 be generalized later, if we ever need to do so:
242 1) Null space dimension === block size of matrix: So no elasticity right now
243 2) QR is not supported: Under assumption #1, this shouldn't cause problems.
244 3) Maps are "good": Aka the first chunk of the ColMap is the RowMap.
245
246 These assumptions keep our code way simpler and still support the use cases we actually care about.
247 */
248
249 RCP<const Map> rowMap = A->getRowMap();
250 RCP<const Map> rangeMap = A->getRangeMap();
251 RCP<const Map> colMap = A->getColMap();
252 // const size_t numFinePointRows = rangeMap->getLocalNumElements();
253 const size_t numFineBlockRows = rowMap->getLocalNumElements();
254
255 typedef Teuchos::ScalarTraits<SC> STS;
256 // typedef typename STS::magnitudeType Magnitude;
257 const SC zero = STS::zero();
258 const SC one = STS::one();
259 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
260
261 const GO numAggs = aggregates->GetNumAggregates();
262 const size_t NSDim = fineNullspace->getNumVectors();
263 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
264
265 // Need to generate the coarse block map
266 // NOTE: We assume NSDim == block size here
267 // NOTE: We also assume that coarseMap has contiguous GIDs
268 // const size_t numCoarsePointRows = coarsePointMap->getLocalNumElements();
269 const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
270 RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
271 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
272 numCoarseBlockRows,
273 coarsePointMap->getIndexBase(),
274 coarsePointMap->getComm());
275 // Sanity checking
276 const ParameterList& pL = GetParameterList();
277 const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
278 const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
279
280 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
281 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
282
283 // The aggregates use the amalgamated column map, which in this case is what we want
284
285 // Aggregates map is based on the amalgamated column map
286 // We can skip global-to-local conversion if LIDs in row map are
287 // same as LIDs in column map
288 bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
289
290 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
291 // aggStart is a pointer into aggToRowMapLO
292 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
293 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
294 ArrayRCP<LO> aggStart;
295 ArrayRCP<LO> aggToRowMapLO;
296 ArrayRCP<GO> aggToRowMapGO;
297 if (goodMap) {
298 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
299 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
300 } else {
301 throw std::runtime_error("TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
302 }
303
304 coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
305
306 // Pull out the nullspace vectors so that we can have random access.
307 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
308 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
309 for (size_t i = 0; i < NSDim; i++) {
310 fineNS[i] = fineNullspace->getData(i);
311 if (coarsePointMap->getLocalNumElements() > 0)
312 coarseNS[i] = coarseNullspace->getDataNonConst(i);
313 }
314
315 // BlockCrs requires that we build the (block) graph first, so let's do that...
316 // NOTE: Because we're assuming that the NSDim == BlockSize, we only have one
317 // block non-zero per row in the matrix;
318 RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap, coarseBlockMap, 0);
319 ArrayRCP<size_t> iaPtent;
320 ArrayRCP<LO> jaPtent;
321 BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
322 ArrayView<size_t> ia = iaPtent();
323 ArrayView<LO> ja = jaPtent();
324
325 for (size_t i = 0; i < numFineBlockRows; i++) {
326 ia[i] = i;
327 ja[i] = INVALID;
328 }
329 ia[numCoarseBlockRows] = numCoarseBlockRows;
330
331 for (GO agg = 0; agg < numAggs; agg++) {
332 LO aggSize = aggStart[agg + 1] - aggStart[agg];
333 Xpetra::global_size_t offset = agg;
334
335 for (LO j = 0; j < aggSize; j++) {
336 // FIXME: Allow for bad maps
337 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
338 const size_t rowStart = ia[localRow];
339 ja[rowStart] = offset;
340 }
341 }
342
343 // Compress storage (remove all INVALID, which happen when we skip zeros)
344 // We do that in-place
345 size_t ia_tmp = 0, nnz = 0;
346 for (size_t i = 0; i < numFineBlockRows; i++) {
347 for (size_t j = ia_tmp; j < ia[i + 1]; j++)
348 if (ja[j] != INVALID) {
349 ja[nnz] = ja[j];
350 nnz++;
351 }
352 ia_tmp = ia[i + 1];
353 ia[i + 1] = nnz;
354 }
355
356 if (rowMap->lib() == Xpetra::UseTpetra) {
357 // - Cannot resize for Epetra, as it checks for same pointers
358 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
359 // NOTE: these invalidate ja and val views
360 jaPtent.resize(nnz);
361 }
362
363 GetOStream(Runtime1) << "TentativePFactory : generating block graph" << std::endl;
364 BlockGraph->setAllIndices(iaPtent, jaPtent);
365
366 // Managing labels & constants for ESFC
367 {
368 RCP<ParameterList> FCparams;
369 if (pL.isSublist("matrixmatrix: kernel params"))
370 FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
371 else
372 FCparams = rcp(new ParameterList);
373 // By default, we don't need global constants for TentativeP, but we do want it for the graph
374 // if we're printing statistics, so let's leave it on for now.
375 FCparams->set("compute global constants", FCparams->get("compute global constants", true));
376 std::string levelIDs = toString(levelID);
377 FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
378 RCP<const Export> dummy_e;
379 RCP<const Import> dummy_i;
380 BlockGraph->expertStaticFillComplete(coarseBlockMap, rowMap, dummy_i, dummy_e, FCparams);
381 }
382
383 // Now let's make a BlockCrs Matrix
384 // NOTE: Assumes block size== NSDim
385 RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC, LO, GO, NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap, NSDim);
386 RCP<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> > P_tpetra = rcp_dynamic_cast<Xpetra::TpetraBlockCrsMatrix<SC, LO, GO, NO> >(P_xpetra);
387 if (P_tpetra.is_null()) throw std::runtime_error("BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
388 RCP<CrsMatrixWrap> P_wrap = rcp(new CrsMatrixWrap(P_xpetra));
389
391 // "no-QR" option //
393 // Local Q factor is just the fine nullspace support over the current aggregate.
394 // Local R factor is the identity.
395 // NOTE: We're not going to do a QR here as we're assuming that blocksize == NSDim
396 // NOTE: "goodMap" case only
397 Teuchos::Array<Scalar> block(NSDim * NSDim, zero);
398 Teuchos::Array<LO> bcol(1);
399
400 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
401 for (LO agg = 0; agg < numAggs; agg++) {
402 bcol[0] = agg;
403 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
404 Xpetra::global_size_t offset = agg * NSDim;
405
406 // Process each row in the local Q factor
407 // NOTE: Blocks are in row-major order
408 for (LO j = 0; j < aggSize; j++) {
409 const LO localBlockRow = aggToRowMapLO[aggStart[agg] + j];
410
411 for (size_t r = 0; r < NSDim; r++) {
412 LO localPointRow = localBlockRow * NSDim + r;
413 for (size_t c = 0; c < NSDim; c++)
414 block[r * NSDim + c] = fineNS[c][localPointRow];
415 }
416 // NOTE: Assumes columns==aggs and are ordered sequentially
417 P_tpetra->replaceLocalValues(localBlockRow, bcol(), block());
418
419 } // end aggSize
420
421 for (size_t j = 0; j < NSDim; j++)
422 coarseNS[j][offset + j] = one;
423
424 } // for (GO agg = 0; agg < numAggs; agg++)
425
426 Ptentative = P_wrap;
427}
428
429template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
431 BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
432 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
433 typedef Teuchos::ScalarTraits<SC> STS;
434 typedef typename STS::magnitudeType Magnitude;
435 const SC zero = STS::zero();
436 const SC one = STS::one();
437
438 // number of aggregates
439 GO numAggs = aggregates->GetNumAggregates();
440
441 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
442 // aggStart is a pointer into aggToRowMap
443 // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
444 // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
445 ArrayRCP<LO> aggStart;
446 ArrayRCP<GO> aggToRowMap;
447 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
448
449 // find size of the largest aggregate
450 LO maxAggSize = 0;
451 for (GO i = 0; i < numAggs; ++i) {
452 LO sizeOfThisAgg = aggStart[i + 1] - aggStart[i];
453 if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
454 }
455
456 // dimension of fine level nullspace
457 const size_t NSDim = fineNullspace->getNumVectors();
458
459 // index base for coarse Dof map (usually 0)
460 GO indexBase = A->getRowMap()->getIndexBase();
461
462 const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
463 const RCP<const Map> uniqueMap = A->getDomainMap();
464 RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
465 RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap, NSDim);
466 fineNullspaceWithOverlap->doImport(*fineNullspace, *importer, Xpetra::INSERT);
467
468 // Pull out the nullspace vectors so that we can have random access.
469 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
470 for (size_t i = 0; i < NSDim; ++i)
471 fineNS[i] = fineNullspaceWithOverlap->getData(i);
472
473 // Allocate storage for the coarse nullspace.
474 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
475
476 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
477 for (size_t i = 0; i < NSDim; ++i)
478 if (coarseMap->getLocalNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
479
480 // This makes the rowmap of Ptent the same as that of A->
481 // This requires moving some parts of some local Q's to other processors
482 // because aggregates can span processors.
483 RCP<const Map> rowMapForPtent = A->getRowMap();
484 const Map& rowMapForPtentRef = *rowMapForPtent;
485
486 // Set up storage for the rows of the local Qs that belong to other processors.
487 // FIXME This is inefficient and could be done within the main loop below with std::vector's.
488 RCP<const Map> colMap = A->getColMap();
489
490 RCP<const Map> ghostQMap;
491 RCP<MultiVector> ghostQvalues;
492 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > ghostQcolumns;
493 RCP<Xpetra::Vector<GO, LO, GO, Node> > ghostQrowNums;
494 ArrayRCP<ArrayRCP<SC> > ghostQvals;
495 ArrayRCP<ArrayRCP<GO> > ghostQcols;
496 ArrayRCP<GO> ghostQrows;
497
498 Array<GO> ghostGIDs;
499 for (LO j = 0; j < numAggs; ++j) {
500 for (LO k = aggStart[j]; k < aggStart[j + 1]; ++k) {
501 if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
502 ghostGIDs.push_back(aggToRowMap[k]);
503 }
504 }
505 }
506 ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
507 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
508 ghostGIDs,
509 indexBase, A->getRowMap()->getComm()); // JG:Xpetra::global_size_t>?
510 // Vector to hold bits of Q that go to other processors.
511 ghostQvalues = MultiVectorFactory::Build(ghostQMap, NSDim);
512 // Note that Epetra does not support MultiVectors templated on Scalar != double.
513 // So to work around this, we allocate an array of Vectors. This shouldn't be too
514 // expensive, as the number of Vectors is NSDim.
515 ghostQcolumns.resize(NSDim);
516 for (size_t i = 0; i < NSDim; ++i)
517 ghostQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
518 ghostQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
519 if (ghostQvalues->getLocalLength() > 0) {
520 ghostQvals.resize(NSDim);
521 ghostQcols.resize(NSDim);
522 for (size_t i = 0; i < NSDim; ++i) {
523 ghostQvals[i] = ghostQvalues->getDataNonConst(i);
524 ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
525 }
526 ghostQrows = ghostQrowNums->getDataNonConst(0);
527 }
528
529 // importer to handle moving Q
530 importer = ImportFactory::Build(ghostQMap, A->getRowMap());
531
532 // Dense QR solver
533 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
534
535 // Allocate temporary storage for the tentative prolongator.
536 Array<GO> globalColPtr(maxAggSize * NSDim, 0);
537 Array<LO> localColPtr(maxAggSize * NSDim, 0);
538 Array<SC> valPtr(maxAggSize * NSDim, 0.);
539
540 // Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
541 const Map& coarseMapRef = *coarseMap;
542
543 // For the 3-arrays constructor
544 ArrayRCP<size_t> ptent_rowptr;
545 ArrayRCP<LO> ptent_colind;
546 ArrayRCP<Scalar> ptent_values;
547
548 // Because ArrayRCPs are slow...
549 ArrayView<size_t> rowptr_v;
550 ArrayView<LO> colind_v;
551 ArrayView<Scalar> values_v;
552
553 // For temporary usage
554 Array<size_t> rowptr_temp;
555 Array<LO> colind_temp;
556 Array<Scalar> values_temp;
557
558 RCP<CrsMatrix> PtentCrs;
559
560 RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim));
561 PtentCrs = PtentCrsWrap->getCrsMatrix();
562 Ptentative = PtentCrsWrap;
563
564 //*****************************************************************
565 // Loop over all aggregates and calculate local QR decompositions.
566 //*****************************************************************
567 GO qctr = 0; // for indexing into Ptent data vectors
568 const Map& nonUniqueMapRef = *nonUniqueMap;
569
570 size_t total_nnz_count = 0;
571
572 for (GO agg = 0; agg < numAggs; ++agg) {
573 LO myAggSize = aggStart[agg + 1] - aggStart[agg];
574 // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
575 // "localQR" (in column major format) for the QR routine.
576 Teuchos::SerialDenseMatrix<LO, SC> localQR(myAggSize, NSDim);
577 for (size_t j = 0; j < NSDim; ++j) {
578 bool bIsZeroNSColumn = true;
579 for (LO k = 0; k < myAggSize; ++k) {
580 // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
581 // fineNS[j][n] is the nth entry in the jth NS vector
582 try {
583 SC nsVal = fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])]; // extract information from fine level NS
584 localQR(k, j) = nsVal;
585 if (nsVal != zero) bIsZeroNSColumn = false;
586 } catch (...) {
587 GetOStream(Runtime1, -1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
588 GetOStream(Runtime1, -1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
589 GetOStream(Runtime1, -1) << "(local?) aggId=" << agg << std::endl;
590 GetOStream(Runtime1, -1) << "aggSize=" << myAggSize << std::endl;
591 GetOStream(Runtime1, -1) << "agg DOF=" << k << std::endl;
592 GetOStream(Runtime1, -1) << "NS vector j=" << j << std::endl;
593 GetOStream(Runtime1, -1) << "j*myAggSize + k = " << j * myAggSize + k << std::endl;
594 GetOStream(Runtime1, -1) << "aggToRowMap[" << agg << "][" << k << "] = " << aggToRowMap[aggStart[agg] + k] << std::endl;
595 GetOStream(Runtime1, -1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg] + k] << " is global element in nonUniqueMap = " << nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
596 GetOStream(Runtime1, -1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
597 GetOStream(Runtime1, -1) << "fineNS...=" << fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])] << std::endl;
598 GetOStream(Errors, -1) << "caught an error!" << std::endl;
599 }
600 } // for (LO k=0 ...
601 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
602 } // for (LO j=0 ...
603
604 Xpetra::global_size_t offset = agg * NSDim;
605
606 if (myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
607 // calculate QR decomposition (standard)
608 // R is stored in localQR (size: myAggSize x NSDim)
609
610 // Householder multiplier
611 SC tau = localQR(0, 0);
612
613 if (NSDim == 1) {
614 // Only one nullspace vector, so normalize by hand
615 Magnitude dtemp = 0;
616 for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
617 Magnitude tmag = STS::magnitude(localQR(k, 0));
618 dtemp += tmag * tmag;
619 }
620 dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
621 tau = localQR(0, 0);
622 localQR(0, 0) = dtemp;
623 } else {
624 qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
625 qrSolver.factor();
626 }
627
628 // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
629 // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
630 // This stores the (offset+k)th entry only if it is local according to the coarseMap.
631 for (size_t j = 0; j < NSDim; ++j) {
632 for (size_t k = 0; k <= j; ++k) {
633 try {
634 if (coarseMapRef.isNodeLocalElement(offset + k)) {
635 coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
636 }
637 } catch (...) {
638 GetOStream(Errors, -1) << "caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k << std::endl;
639 }
640 }
641 }
642
643 // Calculate Q, the tentative prolongator.
644 // The Lapack GEQRF call only works for myAggsize >= NSDim
645
646 if (NSDim == 1) {
647 // Only one nullspace vector, so calculate Q by hand
648 Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0, 0));
649 localQR(0, 0) = tau;
650 dtemp = 1 / dtemp;
651 for (LocalOrdinal i = 0; i < myAggSize; ++i) {
652 localQR(i, 0) *= dtemp;
653 }
654 } else {
655 qrSolver.formQ();
656 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
657 for (size_t j = 0; j < NSDim; j++) {
658 for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
659 localQR(i, j) = (*qFactor)(i, j);
660 }
661 }
662 }
663
664 // end default case (myAggSize >= NSDim)
665 } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
666 // See comments for the uncoupled case
667
668 // R = extended (by adding identity rows) localQR
669 for (size_t j = 0; j < NSDim; j++)
670 for (size_t k = 0; k < NSDim; k++) {
671 TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset + k), Exceptions::RuntimeError,
672 "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k);
673
674 if (k < as<size_t>(myAggSize))
675 coarseNS[j][offset + k] = localQR(k, j);
676 else
677 coarseNS[j][offset + k] = (k == j ? one : zero);
678 }
679
680 // Q = I (rectangular)
681 for (size_t i = 0; i < as<size_t>(myAggSize); i++)
682 for (size_t j = 0; j < NSDim; j++)
683 localQR(i, j) = (j == i ? one : zero);
684 } // end else (special handling for 1pt aggregates)
685
686 // Process each row in the local Q factor. If the row is local to the current processor
687 // according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
688 // to be communicated later to the owning processor.
689 // FIXME -- what happens if maps are blocked?
690 for (GO j = 0; j < myAggSize; ++j) {
691 // This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
692 // If it is, the row is inserted. If not, the row number, columns, and values are saved in
693 // MultiVectors that will be sent to other processors.
694 GO globalRow = aggToRowMap[aggStart[agg] + j];
695
696 // TODO is the use of Xpetra::global_size_t below correct?
697 if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false) {
698 ghostQrows[qctr] = globalRow;
699 for (size_t k = 0; k < NSDim; ++k) {
700 ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg * NSDim + k);
701 ghostQvals[k][qctr] = localQR(j, k);
702 }
703 ++qctr;
704 } else {
705 size_t nnz = 0;
706 for (size_t k = 0; k < NSDim; ++k) {
707 try {
708 if (localQR(j, k) != Teuchos::ScalarTraits<SC>::zero()) {
709 localColPtr[nnz] = agg * NSDim + k;
710 globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
711 valPtr[nnz] = localQR(j, k);
712 ++total_nnz_count;
713 ++nnz;
714 }
715 } catch (...) {
716 GetOStream(Errors, -1) << "caught error in colPtr/valPtr insert, current index=" << nnz << std::endl;
717 }
718 } // for (size_t k=0; k<NSDim; ++k)
719
720 try {
721 Ptentative->insertGlobalValues(globalRow, globalColPtr.view(0, nnz), valPtr.view(0, nnz));
722 } catch (...) {
723 GetOStream(Errors, -1) << "pid " << A->getRowMap()->getComm()->getRank()
724 << "caught error during Ptent row insertion, global row "
725 << globalRow << std::endl;
726 }
727 }
728 } // for (GO j=0; j<myAggSize; ++j)
729
730 } // for (LO agg=0; agg<numAggs; ++agg)
731
732 // ***********************************************************
733 // ************* end of aggregate-wise QR ********************
734 // ***********************************************************
735 GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
736 // Import ghost parts of Q factors and insert into Ptentative.
737 // First import just the global row numbers.
738 RCP<Xpetra::Vector<GO, LO, GO, Node> > targetQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(rowMapForPtent);
739 targetQrowNums->putScalar(-1);
740 targetQrowNums->doImport(*ghostQrowNums, *importer, Xpetra::INSERT);
741 ArrayRCP<GO> targetQrows = targetQrowNums->getDataNonConst(0);
742
743 // Now create map based on just the row numbers imported.
744 Array<GO> gidsToImport;
745 gidsToImport.reserve(targetQrows.size());
746 for (typename ArrayRCP<GO>::iterator r = targetQrows.begin(); r != targetQrows.end(); ++r) {
747 if (*r > -1) {
748 gidsToImport.push_back(*r);
749 }
750 }
751 RCP<const Map> reducedMap = MapFactory::Build(A->getRowMap()->lib(),
752 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
753 gidsToImport, indexBase, A->getRowMap()->getComm());
754
755 // Import using the row numbers that this processor will receive.
756 importer = ImportFactory::Build(ghostQMap, reducedMap);
757
758 Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > targetQcolumns(NSDim);
759 for (size_t i = 0; i < NSDim; ++i) {
760 targetQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(reducedMap);
761 targetQcolumns[i]->doImport(*(ghostQcolumns[i]), *importer, Xpetra::INSERT);
762 }
763 RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap, NSDim);
764 targetQvalues->doImport(*ghostQvalues, *importer, Xpetra::INSERT);
765
766 ArrayRCP<ArrayRCP<SC> > targetQvals;
767 ArrayRCP<ArrayRCP<GO> > targetQcols;
768 if (targetQvalues->getLocalLength() > 0) {
769 targetQvals.resize(NSDim);
770 targetQcols.resize(NSDim);
771 for (size_t i = 0; i < NSDim; ++i) {
772 targetQvals[i] = targetQvalues->getDataNonConst(i);
773 targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
774 }
775 }
776
777 valPtr = Array<SC>(NSDim, 0.);
778 globalColPtr = Array<GO>(NSDim, 0);
779 for (typename Array<GO>::iterator r = gidsToImport.begin(); r != gidsToImport.end(); ++r) {
780 if (targetQvalues->getLocalLength() > 0) {
781 for (size_t j = 0; j < NSDim; ++j) {
782 valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
783 globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
784 }
785 Ptentative->insertGlobalValues(*r, globalColPtr.view(0, NSDim), valPtr.view(0, NSDim));
786 } // if (targetQvalues->getLocalLength() > 0)
787 }
788
789 Ptentative->fillComplete(coarseMap, A->getDomainMap());
790}
791
792template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
794 BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
795 RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
796 RCP<const Map> rowMap = A->getRowMap();
797 RCP<const Map> colMap = A->getColMap();
798 const size_t numRows = rowMap->getLocalNumElements();
799
800 typedef Teuchos::ScalarTraits<SC> STS;
801 typedef typename STS::magnitudeType Magnitude;
802 const SC zero = STS::zero();
803 const SC one = STS::one();
804 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
805
806 const GO numAggs = aggregates->GetNumAggregates();
807 const size_t NSDim = fineNullspace->getNumVectors();
808 ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
809
810 // Sanity checking
811 const ParameterList& pL = GetParameterList();
812 const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
813 const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
814
815 TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
816 "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
817
818 // Aggregates map is based on the amalgamated column map
819 // We can skip global-to-local conversion if LIDs in row map are
820 // same as LIDs in column map
821 bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
822
823 // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
824 // aggStart is a pointer into aggToRowMapLO
825 // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
826 // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
827 ArrayRCP<LO> aggStart;
828 ArrayRCP<LO> aggToRowMapLO;
829 ArrayRCP<GO> aggToRowMapGO;
830 if (goodMap) {
831 amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
832 GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
833
834 } else {
835 amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
836 GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
837 << "using GO->LO conversion with performance penalty" << std::endl;
838 }
839 coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
840
841 // Pull out the nullspace vectors so that we can have random access.
842 ArrayRCP<ArrayRCP<const SC> > fineNS(NSDim);
843 ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
844 for (size_t i = 0; i < NSDim; i++) {
845 fineNS[i] = fineNullspace->getData(i);
846 if (coarseMap->getLocalNumElements() > 0)
847 coarseNS[i] = coarseNullspace->getDataNonConst(i);
848 }
849
850 size_t nnzEstimate = numRows * NSDim;
851
852 // Time to construct the matrix and fill in the values
853 Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
854 RCP<CrsMatrix> PtentCrs = toCrsMatrix(Ptentative);
855
856 ArrayRCP<size_t> iaPtent;
857 ArrayRCP<LO> jaPtent;
858 ArrayRCP<SC> valPtent;
859
860 PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
861
862 ArrayView<size_t> ia = iaPtent();
863 ArrayView<LO> ja = jaPtent();
864 ArrayView<SC> val = valPtent();
865
866 ia[0] = 0;
867 for (size_t i = 1; i <= numRows; i++)
868 ia[i] = ia[i - 1] + NSDim;
869
870 for (size_t j = 0; j < nnzEstimate; j++) {
871 ja[j] = INVALID;
872 val[j] = zero;
873 }
874
875 if (doQRStep) {
877 // Standard aggregate-wise QR //
879 for (GO agg = 0; agg < numAggs; agg++) {
880 LO aggSize = aggStart[agg + 1] - aggStart[agg];
881
882 Xpetra::global_size_t offset = agg * NSDim;
883
884 // Extract the piece of the nullspace corresponding to the aggregate, and
885 // put it in the flat array, "localQR" (in column major format) for the
886 // QR routine.
887 Teuchos::SerialDenseMatrix<LO, SC> localQR(aggSize, NSDim);
888 if (goodMap) {
889 for (size_t j = 0; j < NSDim; j++)
890 for (LO k = 0; k < aggSize; k++)
891 localQR(k, j) = fineNS[j][aggToRowMapLO[aggStart[agg] + k]];
892 } else {
893 for (size_t j = 0; j < NSDim; j++)
894 for (LO k = 0; k < aggSize; k++)
895 localQR(k, j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + k])];
896 }
897
898 // Test for zero columns
899 for (size_t j = 0; j < NSDim; j++) {
900 bool bIsZeroNSColumn = true;
901
902 for (LO k = 0; k < aggSize; k++)
903 if (localQR(k, j) != zero)
904 bIsZeroNSColumn = false;
905
906 TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
907 "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
908 }
909
910 // Calculate QR decomposition (standard)
911 // NOTE: Q is stored in localQR and R is stored in coarseNS
912 if (aggSize >= Teuchos::as<LO>(NSDim)) {
913 if (NSDim == 1) {
914 // Only one nullspace vector, calculate Q and R by hand
915 Magnitude norm = STS::magnitude(zero);
916 for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
917 norm += STS::magnitude(localQR(k, 0) * localQR(k, 0));
918 norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
919
920 // R = norm
921 coarseNS[0][offset] = norm;
922
923 // Q = localQR(:,0)/norm
924 for (LO i = 0; i < aggSize; i++)
925 localQR(i, 0) /= norm;
926
927 } else {
928 Teuchos::SerialQRDenseSolver<LO, SC> qrSolver;
929 qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
930 qrSolver.factor();
931
932 // R = upper triangular part of localQR
933 for (size_t j = 0; j < NSDim; j++)
934 for (size_t k = 0; k <= j; k++)
935 coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
936
937 // Calculate Q, the tentative prolongator.
938 // The Lapack GEQRF call only works for myAggsize >= NSDim
939 qrSolver.formQ();
940 Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
941 for (size_t j = 0; j < NSDim; j++)
942 for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
943 localQR(i, j) = (*qFactor)(i, j);
944 }
945
946 } else {
947 // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
948
949 // The local QR decomposition is not possible in the "overconstrained"
950 // case (i.e. number of columns in localQR > number of rows), which
951 // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
952 // is only possible for single node aggregates in structural mechanics.
953 // (Similar problems may arise in discontinuous Galerkin problems...)
954 // We bypass the QR decomposition and use an identity block in the
955 // tentative prolongator for the single node aggregate and transfer the
956 // corresponding fine level null space information 1-to-1 to the coarse
957 // level null space part.
958
959 // NOTE: The resulting tentative prolongation operator has
960 // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
961 // coarse level operator A. To deal with that one has the following
962 // options:
963 // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
964 // false) to add some identity block to the diagonal of the zero rows
965 // in the coarse level operator A, such that standard level smoothers
966 // can be used again.
967 // - Use special (projection-based) level smoothers, which can deal
968 // with singular matrices (very application specific)
969 // - Adapt the code below to avoid zero columns. However, we do not
970 // support a variable number of DOFs per node in MueLu/Xpetra which
971 // makes the implementation really hard.
972
973 // R = extended (by adding identity rows) localQR
974 for (size_t j = 0; j < NSDim; j++)
975 for (size_t k = 0; k < NSDim; k++)
976 if (k < as<size_t>(aggSize))
977 coarseNS[j][offset + k] = localQR(k, j);
978 else
979 coarseNS[j][offset + k] = (k == j ? one : zero);
980
981 // Q = I (rectangular)
982 for (size_t i = 0; i < as<size_t>(aggSize); i++)
983 for (size_t j = 0; j < NSDim; j++)
984 localQR(i, j) = (j == i ? one : zero);
985 }
986
987 // Process each row in the local Q factor
988 // FIXME: What happens if maps are blocked?
989 for (LO j = 0; j < aggSize; j++) {
990 LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg] + j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]));
991
992 size_t rowStart = ia[localRow];
993 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
994 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
995 if (localQR(j, k) != zero) {
996 ja[rowStart + lnnz] = offset + k;
997 val[rowStart + lnnz] = localQR(j, k);
998 lnnz++;
999 }
1000 }
1001 }
1002 }
1003
1004 } else {
1005 GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
1006 if (NSDim > 1)
1007 GetOStream(Warnings0) << "TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1009 // "no-QR" option //
1011 // Local Q factor is just the fine nullspace support over the current aggregate.
1012 // Local R factor is the identity.
1013 // TODO I have not implemented any special handling for aggregates that are too
1014 // TODO small to locally support the nullspace, as is done in the standard QR
1015 // TODO case above.
1016 if (goodMap) {
1017 for (GO agg = 0; agg < numAggs; agg++) {
1018 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1019 Xpetra::global_size_t offset = agg * NSDim;
1020
1021 // Process each row in the local Q factor
1022 // FIXME: What happens if maps are blocked?
1023 for (LO j = 0; j < aggSize; j++) {
1024 // TODO Here I do not check for a zero nullspace column on the aggregate.
1025 // as is done in the standard QR case.
1026
1027 const LO localRow = aggToRowMapLO[aggStart[agg] + j];
1028
1029 const size_t rowStart = ia[localRow];
1030
1031 for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1032 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1033 SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg] + j]];
1034 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1035 if (qr_jk != zero) {
1036 ja[rowStart + lnnz] = offset + k;
1037 val[rowStart + lnnz] = qr_jk;
1038 lnnz++;
1039 }
1040 }
1041 }
1042 for (size_t j = 0; j < NSDim; j++)
1043 coarseNS[j][offset + j] = one;
1044 } // for (GO agg = 0; agg < numAggs; agg++)
1045
1046 } else {
1047 for (GO agg = 0; agg < numAggs; agg++) {
1048 const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1049 Xpetra::global_size_t offset = agg * NSDim;
1050 for (LO j = 0; j < aggSize; j++) {
1051 const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]);
1052
1053 const size_t rowStart = ia[localRow];
1054
1055 for (size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1056 // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1057 SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j])];
1058 if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1059 if (qr_jk != zero) {
1060 ja[rowStart + lnnz] = offset + k;
1061 val[rowStart + lnnz] = qr_jk;
1062 lnnz++;
1063 }
1064 }
1065 }
1066 for (size_t j = 0; j < NSDim; j++)
1067 coarseNS[j][offset + j] = one;
1068 } // for (GO agg = 0; agg < numAggs; agg++)
1069
1070 } // if (goodmap) else ...
1071
1072 } // if doQRStep ... else
1073
1074 // Compress storage (remove all INVALID, which happen when we skip zeros)
1075 // We do that in-place
1076 size_t ia_tmp = 0, nnz = 0;
1077 for (size_t i = 0; i < numRows; i++) {
1078 for (size_t j = ia_tmp; j < ia[i + 1]; j++)
1079 if (ja[j] != INVALID) {
1080 ja[nnz] = ja[j];
1081 val[nnz] = val[j];
1082 nnz++;
1083 }
1084 ia_tmp = ia[i + 1];
1085 ia[i + 1] = nnz;
1086 }
1087 if (rowMap->lib() == Xpetra::UseTpetra) {
1088 // - Cannot resize for Epetra, as it checks for same pointers
1089 // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
1090 // NOTE: these invalidate ja and val views
1091 jaPtent.resize(nnz);
1092 valPtent.resize(nnz);
1093 }
1094
1095 GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1096
1097 PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1098
1099 // Managing labels & constants for ESFC
1100 RCP<ParameterList> FCparams;
1101 if (pL.isSublist("matrixmatrix: kernel params"))
1102 FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
1103 else
1104 FCparams = rcp(new ParameterList);
1105 // By default, we don't need global constants for TentativeP
1106 FCparams->set("compute global constants", FCparams->get("compute global constants", false));
1107 std::string levelIDs = toString(levelID);
1108 FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
1109 RCP<const Export> dummy_e;
1110 RCP<const Import> dummy_i;
1111
1112 PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(), dummy_i, dummy_e, FCparams);
1113}
1114
1115} // namespace MueLu
1116
1117// TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
1118
1119#define MUELU_TENTATIVEPFACTORY_SHORT
1120#endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
int GetLevelID() const
Return level number.
static const NoFactory * get()
virtual const Teuchos::ParameterList & GetParameterList() const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void BuildPuncoupledBlockCrs(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
static bool MapsAreNested(const Xpetra::Map< DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &rowMap, const Xpetra::Map< DefaultLocalOrdinal, DefaultGlobalOrdinal, DefaultNode > &colMap)
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
bool IsPrint(MsgType type, int thisProcRankOnly=-1) const
Find out whether we need to print out information for a specific message type.
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line).
@ Statistics2
Print even more statistics.
@ Runtime1
Description of what is happening (more verbose).
std::string toString(const T &what)
Little helper function to convert non-string types to strings.