MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_BlackBoxPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_BLACKBOXPFACTORY_DEF_HPP
11#define MUELU_BLACKBOXPFACTORY_DEF_HPP
12
13#include <stdlib.h>
14#include <iomanip>
15
16// #include <Teuchos_LAPACK.hpp>
17#include <Teuchos_SerialDenseMatrix.hpp>
18#include <Teuchos_SerialDenseVector.hpp>
19#include <Teuchos_SerialDenseSolver.hpp>
20
21#include <Xpetra_CrsMatrixUtils.hpp>
22#include <Xpetra_CrsMatrixWrap.hpp>
23#include <Xpetra_ImportFactory.hpp>
24#include <Xpetra_Matrix.hpp>
25#include <Xpetra_MapFactory.hpp>
26#include <Xpetra_MultiVectorFactory.hpp>
27#include <Xpetra_VectorFactory.hpp>
28
29#include <Xpetra_IO.hpp>
30
32
33#include "MueLu_Monitor.hpp"
34
35namespace MueLu {
36
37template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
39 RCP<ParameterList> validParamList = rcp(new ParameterList());
40
41 // Coarsen can come in two forms, either a single char that will be interpreted as an integer
42 // which is used as the coarsening rate in every spatial dimentions,
43 // or it can be a longer string that will then be interpreted as an array of integers.
44 // By default coarsen is set as "{2}", hence a coarsening rate of 2 in every spatial dimension
45 // is the default setting!
46 validParamList->set<std::string>("Coarsen", "{3}", "Coarsening rate in all spatial dimensions");
47 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
48 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
49 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
50 validParamList->set<RCP<const FactoryBase> >("gNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
51 validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
52 validParamList->set<std::string>("stencil type", "full", "You can use two type of stencils: full and reduced, that correspond to 27 and 7 points stencils respectively in 3D.");
53 validParamList->set<std::string>("block strategy", "coupled", "The strategy used to handle systems of PDEs can be: coupled or uncoupled.");
54
55 return validParamList;
56}
57
58template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
60 Level& /* coarseLevel */)
61 const {
62 Input(fineLevel, "A");
63 Input(fineLevel, "Nullspace");
64 Input(fineLevel, "Coordinates");
65 // Request the global number of nodes per dimensions
66 if (fineLevel.GetLevelID() == 0) {
67 if (fineLevel.IsAvailable("gNodesPerDim", NoFactory::get())) {
68 fineLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this);
69 } else {
70 TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get()),
72 "gNodesPerDim was not provided by the user on level0!");
73 }
74 } else {
75 Input(fineLevel, "gNodesPerDim");
76 }
77
78 // Request the local number of nodes per dimensions
79 if (fineLevel.GetLevelID() == 0) {
80 if (fineLevel.IsAvailable("lNodesPerDim", NoFactory::get())) {
81 fineLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this);
82 } else {
83 TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get()),
85 "lNodesPerDim was not provided by the user on level0!");
86 }
87 } else {
88 Input(fineLevel, "lNodesPerDim");
89 }
90}
91
92template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94 Level& coarseLevel) const {
95 return BuildP(fineLevel, coarseLevel);
96}
97
98template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
100 Level& coarseLevel) const {
101 FactoryMonitor m(*this, "Build", coarseLevel);
102
103 // Get parameter list
104 const ParameterList& pL = GetParameterList();
105
106 // obtain general variables
107 RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
108 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
109 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> > coordinates =
111 LO numDimensions = coordinates->getNumVectors();
112 LO BlkSize = A->GetFixedBlockSize();
113
114 // Get fine level geometric data: g(l)FineNodesPerDir and g(l)NumFineNodes
115 Array<GO> gFineNodesPerDir(3);
116 Array<LO> lFineNodesPerDir(3);
117 // Get the number of points in each direction
118 if (fineLevel.GetLevelID() == 0) {
119 gFineNodesPerDir = fineLevel.Get<Array<GO> >("gNodesPerDim", NoFactory::get());
120 lFineNodesPerDir = fineLevel.Get<Array<LO> >("lNodesPerDim", NoFactory::get());
121 } else {
122 // Loading global number of nodes per diretions
123 gFineNodesPerDir = Get<Array<GO> >(fineLevel, "gNodesPerDim");
124
125 // Loading local number of nodes per diretions
126 lFineNodesPerDir = Get<Array<LO> >(fineLevel, "lNodesPerDim");
127 }
128 for (LO i = 0; i < 3; ++i) {
129 if (gFineNodesPerDir[i] == 0) {
130 GetOStream(Runtime0) << "gNodesPerDim in direction " << i << " is set to 1 from 0"
131 << std::endl;
132 gFineNodesPerDir[i] = 1;
133 }
134 if (lFineNodesPerDir[i] == 0) {
135 GetOStream(Runtime0) << "lNodesPerDim in direction " << i << " is set to 1 from 0"
136 << std::endl;
137 lFineNodesPerDir[i] = 1;
138 }
139 }
140 GO gNumFineNodes = gFineNodesPerDir[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
141 LO lNumFineNodes = lFineNodesPerDir[2] * lFineNodesPerDir[1] * lFineNodesPerDir[0];
142
143 // Get the coarsening rate
144 std::string coarsenRate = pL.get<std::string>("Coarsen");
145 Array<LO> coarseRate(3);
146 {
147 Teuchos::Array<LO> crates;
148 try {
149 crates = Teuchos::fromStringToArray<LO>(coarsenRate);
150 } catch (const Teuchos::InvalidArrayStringRepresentation& e) {
151 GetOStream(Errors, -1) << " *** Coarsen must be a string convertible into an array! *** "
152 << std::endl;
153 throw e;
154 }
155 TEUCHOS_TEST_FOR_EXCEPTION((crates.size() > 1) && (crates.size() < numDimensions),
157 "Coarsen must have at least as many components as the number of"
158 " spatial dimensions in the problem.");
159 for (LO i = 0; i < 3; ++i) {
160 if (i < numDimensions) {
161 if (crates.size() == 1) {
162 coarseRate[i] = crates[0];
163 } else if (i < crates.size()) {
164 coarseRate[i] = crates[i];
165 } else {
166 GetOStream(Errors, -1) << " *** Coarsen must be at least as long as the number of"
167 " spatial dimensions! *** "
168 << std::endl;
170 " *** Coarsen must be at least as long as the number of"
171 " spatial dimensions! *** \n");
172 }
173 } else {
174 coarseRate[i] = 1;
175 }
176 }
177 } // End of scope for crates
178
179 // Get the stencil type used for discretization
180 const std::string stencilType = pL.get<std::string>("stencil type");
181 if (stencilType != "full" && stencilType != "reduced") {
182 GetOStream(Errors, -1) << " *** stencil type must be set to: full or reduced, any other value "
183 "is trated as an error! *** "
184 << std::endl;
185 throw Exceptions::RuntimeError(" *** stencil type is neither full, nor reduced! *** \n");
186 }
187
188 // Get the strategy for PDE systems
189 const std::string blockStrategy = pL.get<std::string>("block strategy");
190 if (blockStrategy != "coupled" && blockStrategy != "uncoupled") {
191 GetOStream(Errors, -1) << " *** block strategy must be set to: coupled or uncoupled, any other "
192 "value is trated as an error! *** "
193 << std::endl;
194 throw Exceptions::RuntimeError(" *** block strategy is neither coupled, nor uncoupled! *** \n");
195 }
196
197 GO gNumCoarseNodes = 0;
198 LO lNumCoarseNodes = 0;
199 Array<GO> gIndices(3), gCoarseNodesPerDir(3), ghostGIDs, coarseNodesGIDs, colGIDs;
200 Array<LO> myOffset(3), lCoarseNodesPerDir(3), glCoarseNodesPerDir(3), endRate(3);
201 Array<bool> ghostInterface(6);
202 Array<int> boundaryFlags(3);
203 ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes(numDimensions);
204 Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
205 for (LO dim = 0; dim < numDimensions; ++dim) {
206 fineNodes[dim] = coordinates->getData(dim)();
207 }
208
209 // This struct stores PIDs, LIDs and GIDs on the fine mesh and GIDs on the coarse mesh.
210 RCP<NodesIDs> ghostedCoarseNodes = rcp(new NodesIDs{});
211
212 GetGeometricData(coordinates, coarseRate, gFineNodesPerDir, lFineNodesPerDir, BlkSize, // inputs
213 gIndices, myOffset, ghostInterface, endRate, gCoarseNodesPerDir, // outputs
214 lCoarseNodesPerDir, glCoarseNodesPerDir, ghostGIDs, coarseNodesGIDs, colGIDs,
215 gNumCoarseNodes, lNumCoarseNodes, coarseNodes, boundaryFlags,
216 ghostedCoarseNodes);
217
218 // Create the MultiVector of coarse coordinates
219 Xpetra::UnderlyingLib lib = coordinates->getMap()->lib();
220 RCP<const Map> coarseCoordsMap = MapFactory::Build(lib,
221 gNumCoarseNodes,
222 coarseNodesGIDs.view(0, lNumCoarseNodes),
223 coordinates->getMap()->getIndexBase(),
224 coordinates->getMap()->getComm());
225 Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseCoords(numDimensions);
226 for (LO dim = 0; dim < numDimensions; ++dim) {
227 coarseCoords[dim] = coarseNodes[dim]();
228 }
229 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> > coarseCoordinates =
230 Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO>::Build(coarseCoordsMap, coarseCoords(),
231 numDimensions);
232
233 // Now create a new matrix: Aghost that contains all the data
234 // locally needed to compute the local part of the prolongator.
235 // Here assuming that all the coarse nodes o and fine nodes +
236 // are local then all the data associated with the coarse
237 // nodes O and the fine nodes * needs to be imported.
238 //
239 // *--*--*--*--*--*--*--*
240 // | | | | | | | |
241 // o--+--+--o--+--+--O--*
242 // | | | | | | | |
243 // +--+--+--+--+--+--*--*
244 // | | | | | | | |
245 // +--+--+--+--+--+--*--*
246 // | | | | | | | |
247 // o--+--+--o--+--+--O--*
248 // | | | | | | | |
249 // +--+--+--+--+--+--*--*
250 // | | | | | | | |
251 // *--*--*--*--*--*--*--*
252 // | | | | | | | |
253 // O--*--*--O--*--*--O--*
254 //
255 // Creating that local matrix is easy enough using proper range
256 // and domain maps to import data from A. Note that with this
257 // approach we reorder the local entries using the domain map and
258 // can subsequently compute the prolongator without reordering.
259 // As usual we need to be careful about any coarsening rate
260 // change at the boundary!
261
262 // The ingredients needed are an importer, a range map and a domain map
263 Array<GO> ghostRowGIDs, ghostColGIDs, nodeSteps(3);
264 nodeSteps[0] = 1;
265 nodeSteps[1] = gFineNodesPerDir[0];
266 nodeSteps[2] = gFineNodesPerDir[0] * gFineNodesPerDir[1];
267 Array<LO> glFineNodesPerDir(3);
268 GO startingGID = A->getRowMap()->getMinGlobalIndex();
269 for (LO dim = 0; dim < 3; ++dim) {
270 LO numCoarseNodes = 0;
271 if (dim < numDimensions) {
272 startingGID -= myOffset[dim] * nodeSteps[dim];
273 numCoarseNodes = lCoarseNodesPerDir[dim];
274 if (ghostInterface[2 * dim]) {
275 ++numCoarseNodes;
276 }
277 if (ghostInterface[2 * dim + 1]) {
278 ++numCoarseNodes;
279 }
280 if (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
281 glFineNodesPerDir[dim] = (numCoarseNodes - 2) * coarseRate[dim] + endRate[dim] + 1;
282 } else {
283 glFineNodesPerDir[dim] = (numCoarseNodes - 1) * coarseRate[dim] + 1;
284 }
285 } else {
286 glFineNodesPerDir[dim] = 1;
287 }
288 }
289 ghostRowGIDs.resize(glFineNodesPerDir[0] * glFineNodesPerDir[1] * glFineNodesPerDir[2] * BlkSize);
290 for (LO k = 0; k < glFineNodesPerDir[2]; ++k) {
291 for (LO j = 0; j < glFineNodesPerDir[1]; ++j) {
292 for (LO i = 0; i < glFineNodesPerDir[0]; ++i) {
293 for (LO l = 0; l < BlkSize; ++l) {
294 ghostRowGIDs[(k * glFineNodesPerDir[1] * glFineNodesPerDir[0] + j * glFineNodesPerDir[0] + i) * BlkSize + l] = startingGID + (k * gFineNodesPerDir[1] * gFineNodesPerDir[0] + j * gFineNodesPerDir[0] + i) * BlkSize + l;
295 }
296 }
297 }
298 }
299
300 // Looking at the above loops it is easy to find startingGID for the ghostColGIDs
301 Array<GO> startingGlobalIndices(numDimensions), dimStride(numDimensions);
302 Array<GO> startingColIndices(numDimensions), finishingColIndices(numDimensions);
303 GO colMinGID = 0;
304 Array<LO> colRange(numDimensions);
305 dimStride[0] = 1;
306 for (int dim = 1; dim < numDimensions; ++dim) {
307 dimStride[dim] = dimStride[dim - 1] * gFineNodesPerDir[dim - 1];
308 }
309 {
310 GO tmp = startingGID;
311 for (int dim = numDimensions; dim > 0; --dim) {
312 startingGlobalIndices[dim - 1] = tmp / dimStride[dim - 1];
313 tmp = tmp % dimStride[dim - 1];
314
315 if (startingGlobalIndices[dim - 1] > 0) {
316 startingColIndices[dim - 1] = startingGlobalIndices[dim - 1] - 1;
317 }
318 if (startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] < gFineNodesPerDir[dim - 1]) {
319 finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1];
320 } else {
321 finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] - 1;
322 }
323 colRange[dim - 1] = finishingColIndices[dim - 1] - startingColIndices[dim - 1] + 1;
324 colMinGID += startingColIndices[dim - 1] * dimStride[dim - 1];
325 }
326 }
327 ghostColGIDs.resize(colRange[0] * colRange[1] * colRange[2] * BlkSize);
328 for (LO k = 0; k < colRange[2]; ++k) {
329 for (LO j = 0; j < colRange[1]; ++j) {
330 for (LO i = 0; i < colRange[0]; ++i) {
331 for (LO l = 0; l < BlkSize; ++l) {
332 ghostColGIDs[(k * colRange[1] * colRange[0] + j * colRange[0] + i) * BlkSize + l] = colMinGID + (k * gFineNodesPerDir[1] * gFineNodesPerDir[0] + j * gFineNodesPerDir[0] + i) * BlkSize + l;
333 }
334 }
335 }
336 }
337
338 RCP<const Map> ghostedRowMap = Xpetra::MapFactory<LO, GO, NO>::Build(A->getRowMap()->lib(),
339 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
340 ghostRowGIDs(),
341 A->getRowMap()->getIndexBase(),
342 A->getRowMap()->getComm());
343 RCP<const Map> ghostedColMap = Xpetra::MapFactory<LO, GO, NO>::Build(A->getRowMap()->lib(),
344 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
345 ghostColGIDs(),
346 A->getRowMap()->getIndexBase(),
347 A->getRowMap()->getComm());
348 RCP<const Import> ghostImporter = Xpetra::ImportFactory<LO, GO, NO>::Build(A->getRowMap(),
349 ghostedRowMap);
350 RCP<const Matrix> Aghost = Xpetra::MatrixFactory<SC, LO, GO, NO>::Build(A, *ghostImporter,
351 ghostedRowMap,
352 ghostedRowMap);
353
354 // Create the maps and data structures for the projection matrix
355 RCP<const Map> rowMapP = A->getDomainMap();
356
357 RCP<const Map> domainMapP;
358
359 RCP<const Map> colMapP;
360
361 // At this point we need to create the column map which is a delicate operation.
362 // The entries in that map need to be ordered as follows:
363 // 1) first owned entries ordered by LID
364 // 2) second order the remaining entries by PID
365 // 3) entries with the same remote PID are ordered by GID.
366 // One piece of good news: lNumCoarseNodes is the number of ownedNodes and lNumGhostNodes
367 // is the number of remote nodes. The sorting can be limited to remote nodes
368 // as the owned ones are alreaded ordered by LID!
369
370 LO lNumGhostedNodes = ghostedCoarseNodes->GIDs.size();
371 {
372 std::vector<NodeID> colMapOrdering(lNumGhostedNodes);
373 for (LO ind = 0; ind < lNumGhostedNodes; ++ind) {
374 colMapOrdering[ind].GID = ghostedCoarseNodes->GIDs[ind];
375 if (ghostedCoarseNodes->PIDs[ind] == rowMapP->getComm()->getRank()) {
376 colMapOrdering[ind].PID = -1;
377 } else {
378 colMapOrdering[ind].PID = ghostedCoarseNodes->PIDs[ind];
379 }
380 colMapOrdering[ind].LID = ghostedCoarseNodes->LIDs[ind];
381 colMapOrdering[ind].lexiInd = ind;
382 }
383 std::sort(colMapOrdering.begin(), colMapOrdering.end(),
384 [](NodeID a, NodeID b) -> bool {
385 return ((a.PID < b.PID) || ((a.PID == b.PID) && (a.GID < b.GID)));
386 });
387
388 colGIDs.resize(BlkSize * lNumGhostedNodes);
389 for (LO ind = 0; ind < lNumGhostedNodes; ++ind) {
390 // Save the permutation calculated to go from Lexicographic indexing to column map indexing
391 ghostedCoarseNodes->colInds[colMapOrdering[ind].lexiInd] = ind;
392 for (LO dof = 0; dof < BlkSize; ++dof) {
393 colGIDs[BlkSize * ind + dof] = BlkSize * colMapOrdering[ind].GID + dof;
394 }
395 }
396 domainMapP = Xpetra::MapFactory<LO, GO, NO>::Build(rowMapP->lib(),
397 BlkSize * gNumCoarseNodes,
398 colGIDs.view(0, BlkSize * lNumCoarseNodes),
399 rowMapP->getIndexBase(),
400 rowMapP->getComm());
401 colMapP = Xpetra::MapFactory<LO, GO, NO>::Build(rowMapP->lib(),
402 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
403 colGIDs.view(0, colGIDs.size()),
404 rowMapP->getIndexBase(),
405 rowMapP->getComm());
406 } // End of scope for colMapOrdering and colGIDs
407
408 std::vector<size_t> strideInfo(1);
409 strideInfo[0] = BlkSize;
410 RCP<const Map> stridedDomainMapP = Xpetra::StridedMapFactory<LO, GO, NO>::Build(domainMapP,
411 strideInfo);
412
413 GO gnnzP = 0;
414 LO lnnzP = 0;
415 // coarse points have one nnz per row
416 gnnzP += gNumCoarseNodes;
417 lnnzP += lNumCoarseNodes;
418 // add all other points multiplying by 2^numDimensions
419 gnnzP += (gNumFineNodes - gNumCoarseNodes) * std::pow(2, numDimensions);
420 lnnzP += (lNumFineNodes - lNumCoarseNodes) * std::pow(2, numDimensions);
421 // finally multiply by the number of dofs per node
422 gnnzP = gnnzP * BlkSize;
423 lnnzP = lnnzP * BlkSize;
424
425 // Create the matrix itself using the above maps
426 RCP<Matrix> P;
427 P = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0));
428 RCP<CrsMatrix> PCrs = toCrsMatrix(P);
429
430 ArrayRCP<size_t> iaP;
431 ArrayRCP<LO> jaP;
432 ArrayRCP<SC> valP;
433
434 PCrs->allocateAllValues(lnnzP, iaP, jaP, valP);
435
436 ArrayView<size_t> ia = iaP();
437 ArrayView<LO> ja = jaP();
438 ArrayView<SC> val = valP();
439 ia[0] = 0;
440
441 LO numCoarseElements = 1;
442 Array<LO> lCoarseElementsPerDir(3);
443 for (LO dim = 0; dim < numDimensions; ++dim) {
444 lCoarseElementsPerDir[dim] = lCoarseNodesPerDir[dim];
445 if (ghostInterface[2 * dim]) {
446 ++lCoarseElementsPerDir[dim];
447 }
448 if (!ghostInterface[2 * dim + 1]) {
449 --lCoarseElementsPerDir[dim];
450 }
451 numCoarseElements = numCoarseElements * lCoarseElementsPerDir[dim];
452 }
453
454 for (LO dim = numDimensions; dim < 3; ++dim) {
455 lCoarseElementsPerDir[dim] = 1;
456 }
457
458 // Loop over the coarse elements
459 Array<int> elementFlags(3);
460 Array<LO> elemInds(3), elementNodesPerDir(3), glElementRefTuple(3);
461 Array<LO> glElementRefTupleCG(3), glElementCoarseNodeCG(8);
462 const int numCoarseNodesInElement = std::pow(2, numDimensions);
463 const int nnzPerCoarseNode = (blockStrategy == "coupled") ? BlkSize : 1;
464 const int numRowsPerPoint = BlkSize;
465 for (elemInds[2] = 0; elemInds[2] < lCoarseElementsPerDir[2]; ++elemInds[2]) {
466 for (elemInds[1] = 0; elemInds[1] < lCoarseElementsPerDir[1]; ++elemInds[1]) {
467 for (elemInds[0] = 0; elemInds[0] < lCoarseElementsPerDir[0]; ++elemInds[0]) {
468 elementFlags[0] = 0;
469 elementFlags[1] = 0;
470 elementFlags[2] = 0;
471 for (int dim = 0; dim < 3; ++dim) {
472 // Detect boundary conditions on the element and set corresponding flags.
473 if (elemInds[dim] == 0 && elemInds[dim] == lCoarseElementsPerDir[dim] - 1) {
474 elementFlags[dim] = boundaryFlags[dim];
475 } else if (elemInds[dim] == 0 && (boundaryFlags[dim] == 1 || boundaryFlags[dim] == 3)) {
476 elementFlags[dim] += 1;
477 } else if ((elemInds[dim] == lCoarseElementsPerDir[dim] - 1) && (boundaryFlags[dim] == 2 || boundaryFlags[dim] == 3)) {
478 elementFlags[dim] += 2;
479 } else {
480 elementFlags[dim] = 0;
481 }
482
483 // Compute the number of nodes in the current element.
484 if (dim < numDimensions) {
485 if ((elemInds[dim] == lCoarseElementsPerDir[dim]) && (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim])) {
486 elementNodesPerDir[dim] = endRate[dim] + 1;
487 } else {
488 elementNodesPerDir[dim] = coarseRate[dim] + 1;
489 }
490 } else {
491 elementNodesPerDir[dim] = 1;
492 }
493
494 // Get the lowest tuple of the element using the ghosted local coordinate system
495 glElementRefTuple[dim] = elemInds[dim] * coarseRate[dim];
496 glElementRefTupleCG[dim] = elemInds[dim];
497 }
498
499 // Now get the column map indices corresponding to the dofs associated with the current
500 // element's coarse nodes.
501 for (typename Array<LO>::size_type elem = 0; elem < glElementCoarseNodeCG.size(); ++elem) {
502 glElementCoarseNodeCG[elem] = glElementRefTupleCG[2] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0] + glElementRefTupleCG[1] * glCoarseNodesPerDir[0] + glElementRefTupleCG[0];
503 }
504 glElementCoarseNodeCG[4] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
505 glElementCoarseNodeCG[5] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
506 glElementCoarseNodeCG[6] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
507 glElementCoarseNodeCG[7] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
508
509 glElementCoarseNodeCG[2] += glCoarseNodesPerDir[0];
510 glElementCoarseNodeCG[3] += glCoarseNodesPerDir[0];
511 glElementCoarseNodeCG[6] += glCoarseNodesPerDir[0];
512 glElementCoarseNodeCG[7] += glCoarseNodesPerDir[0];
513
514 glElementCoarseNodeCG[1] += 1;
515 glElementCoarseNodeCG[3] += 1;
516 glElementCoarseNodeCG[5] += 1;
517 glElementCoarseNodeCG[7] += 1;
518
519 LO numNodesInElement = elementNodesPerDir[0] * elementNodesPerDir[1] * elementNodesPerDir[2];
520 // LO elementOffset = elemInds[2]*coarseRate[2]*glFineNodesPerDir[1]*glFineNodesPerDir[0]
521 // + elemInds[1]*coarseRate[1]*glFineNodesPerDir[0] + elemInds[0]*coarseRate[0];
522
523 // Compute the element prolongator
524 Teuchos::SerialDenseMatrix<LO, SC> Pi, Pf, Pe;
525 Array<LO> dofType(numNodesInElement * BlkSize), lDofInd(numNodesInElement * BlkSize);
526 ComputeLocalEntries(Aghost, coarseRate, endRate, BlkSize, elemInds, lCoarseElementsPerDir,
527 numDimensions, glFineNodesPerDir, gFineNodesPerDir, gIndices,
528 lCoarseNodesPerDir, ghostInterface, elementFlags, stencilType,
529 blockStrategy, elementNodesPerDir, numNodesInElement, colGIDs,
530 Pi, Pf, Pe, dofType, lDofInd);
531
532 // Find ghosted LID associated with nodes in the element and eventually which of these
533 // nodes are ghosts, this information is used to fill the local prolongator.
534 Array<LO> lNodeLIDs(numNodesInElement);
535 {
536 Array<LO> lNodeTuple(3), nodeInd(3);
537 for (nodeInd[2] = 0; nodeInd[2] < elementNodesPerDir[2]; ++nodeInd[2]) {
538 for (nodeInd[1] = 0; nodeInd[1] < elementNodesPerDir[1]; ++nodeInd[1]) {
539 for (nodeInd[0] = 0; nodeInd[0] < elementNodesPerDir[0]; ++nodeInd[0]) {
540 int stencilLength = 0;
541 if ((nodeInd[0] == 0 || nodeInd[0] == elementNodesPerDir[0] - 1) &&
542 (nodeInd[1] == 0 || nodeInd[1] == elementNodesPerDir[1] - 1) &&
543 (nodeInd[2] == 0 || nodeInd[2] == elementNodesPerDir[2] - 1)) {
544 stencilLength = 1;
545 } else {
546 stencilLength = std::pow(2, numDimensions);
547 }
548 LO nodeElementInd = nodeInd[2] * elementNodesPerDir[1] * elementNodesPerDir[1] + nodeInd[1] * elementNodesPerDir[0] + nodeInd[0];
549 for (int dim = 0; dim < 3; ++dim) {
550 lNodeTuple[dim] = glElementRefTuple[dim] - myOffset[dim] + nodeInd[dim];
551 }
552 if (lNodeTuple[0] < 0 || lNodeTuple[1] < 0 || lNodeTuple[2] < 0 ||
553 lNodeTuple[0] > lFineNodesPerDir[0] - 1 ||
554 lNodeTuple[1] > lFineNodesPerDir[1] - 1 ||
555 lNodeTuple[2] > lFineNodesPerDir[2] - 1) {
556 // This flags the ghosts nodes used for prolongator calculation but for which
557 // we do not own the row, hence we won't fill these values on this rank.
558 lNodeLIDs[nodeElementInd] = -1;
559 } else if ((nodeInd[0] == 0 && elemInds[0] != 0) ||
560 (nodeInd[1] == 0 && elemInds[1] != 0) ||
561 (nodeInd[2] == 0 && elemInds[2] != 0)) {
562 // This flags nodes that are owned but common to two coarse elements and that
563 // were already filled by another element, we don't want to fill them twice so
564 // we skip them
565 lNodeLIDs[nodeElementInd] = -2;
566 } else {
567 // The remaining nodes are locally owned and we need to fill the coresponding
568 // rows of the prolongator
569
570 // First we need to find which row needs to be filled
571 lNodeLIDs[nodeElementInd] = lNodeTuple[2] * lFineNodesPerDir[1] * lFineNodesPerDir[0] + lNodeTuple[1] * lFineNodesPerDir[0] + lNodeTuple[0];
572
573 // We also compute the row offset using arithmetic to ensure that we can loop
574 // easily over the nodes in a macro-element as well as facilitate on-node
575 // parallelization. The node serial code could be rewritten with two loops over
576 // the local part of the mesh to avoid the costly integer divisions...
577 Array<LO> refCoarsePointTuple(3);
578 for (int dim = 2; dim > -1; --dim) {
579 if (dim == 0) {
580 refCoarsePointTuple[dim] = (lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim];
581 if (myOffset[dim] == 0) {
582 ++refCoarsePointTuple[dim];
583 }
584 } else {
585 // Note: no need for magnitudeType here, just use double because these things are LO's
586 refCoarsePointTuple[dim] =
587 std::ceil(static_cast<double>(lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim]);
588 }
589 if ((lNodeTuple[dim] + myOffset[dim]) % coarseRate[dim] > 0) {
590 break;
591 }
592 }
593 const LO numCoarsePoints = refCoarsePointTuple[0] + refCoarsePointTuple[1] * lCoarseNodesPerDir[0] + refCoarsePointTuple[2] * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0];
594 const LO numFinePoints = lNodeLIDs[nodeElementInd] + 1;
595
596 // The below formula computes the rowPtr for row 'n+BlkSize' and we are about to
597 // fill row 'n' to 'n+BlkSize'.
598 size_t rowPtr = (numFinePoints - numCoarsePoints) * numRowsPerPoint * numCoarseNodesInElement * nnzPerCoarseNode + numCoarsePoints * numRowsPerPoint;
599 if (dofType[nodeElementInd * BlkSize] == 0) {
600 // Check if current node is a coarse point
601 rowPtr = rowPtr - numRowsPerPoint;
602 } else {
603 rowPtr = rowPtr - numRowsPerPoint * numCoarseNodesInElement * nnzPerCoarseNode;
604 }
605
606 for (int dof = 0; dof < BlkSize; ++dof) {
607 // Now we loop over the stencil and fill the column indices and row values
608 int cornerInd = 0;
609 switch (dofType[nodeElementInd * BlkSize + dof]) {
610 case 0: // Corner node
611 if (nodeInd[2] == elementNodesPerDir[2] - 1) {
612 cornerInd += 4;
613 }
614 if (nodeInd[1] == elementNodesPerDir[1] - 1) {
615 cornerInd += 2;
616 }
617 if (nodeInd[0] == elementNodesPerDir[0] - 1) {
618 cornerInd += 1;
619 }
620 ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + dof + 1;
621 ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[cornerInd]] * BlkSize + dof;
622 val[rowPtr + dof] = 1.0;
623 break;
624
625 case 1: // Edge node
626 ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
627 for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
628 if (blockStrategy == "coupled") {
629 for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
630 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
631 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
632 val[lRowPtr] = Pe(lDofInd[nodeElementInd * BlkSize + dof],
633 ind1 * BlkSize + ind2);
634 }
635 } else if (blockStrategy == "uncoupled") {
636 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
637 ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
638 val[lRowPtr] = Pe(lDofInd[nodeElementInd * BlkSize + dof],
639 ind1 * BlkSize + dof);
640 }
641 }
642 break;
643
644 case 2: // Face node
645 ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
646 for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
647 if (blockStrategy == "coupled") {
648 for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
649 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
650 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
651 val[lRowPtr] = Pf(lDofInd[nodeElementInd * BlkSize + dof],
652 ind1 * BlkSize + ind2);
653 }
654 } else if (blockStrategy == "uncoupled") {
655 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
656 // ja [lRowPtr] = colGIDs[glElementCoarseNodeCG[ind1]*BlkSize + dof];
657 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
658 val[lRowPtr] = Pf(lDofInd[nodeElementInd * BlkSize + dof],
659 ind1 * BlkSize + dof);
660 }
661 }
662 break;
663
664 case 3: // Interior node
665 ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
666 for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
667 if (blockStrategy == "coupled") {
668 for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
669 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
670 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
671 val[lRowPtr] = Pi(lDofInd[nodeElementInd * BlkSize + dof],
672 ind1 * BlkSize + ind2);
673 }
674 } else if (blockStrategy == "uncoupled") {
675 size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
676 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
677 val[lRowPtr] = Pi(lDofInd[nodeElementInd * BlkSize + dof],
678 ind1 * BlkSize + dof);
679 }
680 }
681 break;
682 }
683 }
684 }
685 }
686 }
687 }
688 } // End of scopt for lNodeTuple and nodeInd
689 }
690 }
691 }
692
693 // Sort all row's column indicies and entries by LID
694 Xpetra::CrsMatrixUtils<SC, LO, GO, NO>::sortCrsEntries(ia, ja, val, rowMapP->lib());
695
696 // Set the values of the prolongation operators into the CrsMatrix P and call FillComplete
697 PCrs->setAllValues(iaP, jaP, valP);
698 PCrs->expertStaticFillComplete(domainMapP, rowMapP);
699
700 // set StridingInformation of P
701 if (A->IsView("stridedMaps") == true) {
702 P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), stridedDomainMapP);
703 } else {
704 P->CreateView("stridedMaps", P->getRangeMap(), stridedDomainMapP);
705 }
706
707 // store the transfer operator and node coordinates on coarse level
708 Set(coarseLevel, "P", P);
709 Set(coarseLevel, "coarseCoordinates", coarseCoordinates);
710 Set<Array<GO> >(coarseLevel, "gCoarseNodesPerDim", gCoarseNodesPerDir);
711 Set<Array<LO> >(coarseLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir);
712}
713
714template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
716 GetGeometricData(RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> >& coordinates,
717 const Array<LO> coarseRate, const Array<GO> gFineNodesPerDir,
718 const Array<LO> lFineNodesPerDir, const LO BlkSize, Array<GO>& gIndices,
719 Array<LO>& myOffset, Array<bool>& ghostInterface, Array<LO>& endRate,
720 Array<GO>& gCoarseNodesPerDir, Array<LO>& lCoarseNodesPerDir,
721 Array<LO>& glCoarseNodesPerDir, Array<GO>& ghostGIDs, Array<GO>& coarseNodesGIDs,
722 Array<GO>& colGIDs, GO& gNumCoarseNodes, LO& lNumCoarseNodes,
723 ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes, Array<int>& boundaryFlags,
724 RCP<NodesIDs> ghostedCoarseNodes) const {
725 // This function is extracting the geometric information from the coordinates
726 // and creates the necessary data/formatting to perform locally the calculation
727 // of the pronlongator.
728 //
729 // inputs:
730
731 RCP<const Map> coordinatesMap = coordinates->getMap();
732 LO numDimensions = coordinates->getNumVectors();
733
734 // Using the coarsening rate and the fine level data,
735 // compute coarse level data
736
737 // Phase 1 //
738 // ------------------------------------------------------------------ //
739 // We first start by finding small informations on the mesh such as //
740 // the number of coarse nodes (local and global) and the number of //
741 // ghost nodes / the end rate of coarsening. //
742 // ------------------------------------------------------------------ //
743 GO minGlobalIndex = coordinatesMap->getMinGlobalIndex();
744 {
745 GO tmp;
746 gIndices[2] = minGlobalIndex / (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
747 tmp = minGlobalIndex % (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
748 gIndices[1] = tmp / gFineNodesPerDir[0];
749 gIndices[0] = tmp % gFineNodesPerDir[0];
750
751 myOffset[2] = gIndices[2] % coarseRate[2];
752 myOffset[1] = gIndices[1] % coarseRate[1];
753 myOffset[0] = gIndices[0] % coarseRate[0];
754 }
755
756 for (int dim = 0; dim < 3; ++dim) {
757 if (gIndices[dim] == 0) {
758 boundaryFlags[dim] += 1;
759 }
760 if (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
761 boundaryFlags[dim] += 2;
762 }
763 }
764
765 // Check whether ghost nodes are needed in each direction
766 for (LO i = 0; i < numDimensions; ++i) {
767 if ((gIndices[i] != 0) && (gIndices[i] % coarseRate[i] > 0)) {
768 ghostInterface[2 * i] = true;
769 }
770 if (((gIndices[i] + lFineNodesPerDir[i]) != gFineNodesPerDir[i]) && ((gIndices[i] + lFineNodesPerDir[i] - 1) % coarseRate[i] > 0)) {
771 ghostInterface[2 * i + 1] = true;
772 }
773 }
774
775 for (LO i = 0; i < 3; ++i) {
776 if (i < numDimensions) {
777 lCoarseNodesPerDir[i] = (lFineNodesPerDir[i] + myOffset[i] - 1) / coarseRate[i];
778 if (myOffset[i] == 0) {
779 ++lCoarseNodesPerDir[i];
780 }
781 gCoarseNodesPerDir[i] = (gFineNodesPerDir[i] - 1) / coarseRate[i];
782 endRate[i] = (gFineNodesPerDir[i] - 1) % coarseRate[i];
783 if (endRate[i] == 0) {
784 ++gCoarseNodesPerDir[i];
785 endRate[i] = coarseRate[i];
786 }
787 } else {
788 // Most quantities need to be set to 1 for extra dimensions
789 // this is rather logical, an x-y plane is like a single layer
790 // of nodes in the z direction...
791 gCoarseNodesPerDir[i] = 1;
792 lCoarseNodesPerDir[i] = 1;
793 endRate[i] = 1;
794 }
795 }
796
797 gNumCoarseNodes = gCoarseNodesPerDir[0] * gCoarseNodesPerDir[1] * gCoarseNodesPerDir[2];
798 lNumCoarseNodes = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[2];
799
800 // For each direction, determine how many ghost points are required.
801 LO lNumGhostNodes = 0;
802 Array<GO> startGhostedCoarseNode(3);
803 {
804 const int complementaryIndices[3][2] = {{1, 2}, {0, 2}, {0, 1}};
805 LO tmp = 0;
806 for (LO i = 0; i < 3; ++i) {
807 // The first branch of this if-statement will be used if the rank contains only one layer
808 // of nodes in direction i, that layer must also coincide with the boundary of the mesh
809 // and coarseRate[i] == endRate[i]...
810 if ((gIndices[i] == gFineNodesPerDir[i] - 1) && (gIndices[i] % coarseRate[i] == 0)) {
811 startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i] - 1;
812 } else {
813 startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i];
814 }
815 // First we load the number of locally owned coarse nodes
816 glCoarseNodesPerDir[i] = lCoarseNodesPerDir[i];
817
818 // Check whether a face in direction i needs ghost nodes
819 if (ghostInterface[2 * i] || ghostInterface[2 * i + 1]) {
820 ++glCoarseNodesPerDir[i];
821 if (i == 0) {
822 tmp = lCoarseNodesPerDir[1] * lCoarseNodesPerDir[2];
823 }
824 if (i == 1) {
825 tmp = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[2];
826 }
827 if (i == 2) {
828 tmp = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1];
829 }
830 }
831 // If both faces in direction i need nodes, double the number of ghost nodes
832 if (ghostInterface[2 * i] && ghostInterface[2 * i + 1]) {
833 ++glCoarseNodesPerDir[i];
834 tmp = 2 * tmp;
835 }
836 lNumGhostNodes += tmp;
837
838 // The corners and edges need to be checked in 2D / 3D to add more ghosts nodes
839 for (LO j = 0; j < 2; ++j) {
840 for (LO k = 0; k < 2; ++k) {
841 // Check if two adjoining faces need ghost nodes and then add their common edge
842 if (ghostInterface[2 * complementaryIndices[i][0] + j] && ghostInterface[2 * complementaryIndices[i][1] + k]) {
843 lNumGhostNodes += lCoarseNodesPerDir[i];
844 // Add corners if three adjoining faces need ghost nodes,
845 // but add them only once! Hence when i == 0.
846 if (ghostInterface[2 * i] && (i == 0)) {
847 lNumGhostNodes += 1;
848 }
849 if (ghostInterface[2 * i + 1] && (i == 0)) {
850 lNumGhostNodes += 1;
851 }
852 }
853 }
854 }
855 tmp = 0;
856 }
857 } // end of scope for tmp and complementaryIndices
858
859 const LO lNumGhostedNodes = glCoarseNodesPerDir[0] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[2];
860 ghostedCoarseNodes->PIDs.resize(lNumGhostedNodes);
861 ghostedCoarseNodes->LIDs.resize(lNumGhostedNodes);
862 ghostedCoarseNodes->GIDs.resize(lNumGhostedNodes);
863 ghostedCoarseNodes->coarseGIDs.resize(lNumGhostedNodes);
864 ghostedCoarseNodes->colInds.resize(lNumGhostedNodes);
865
866 // We loop over all ghosted coarse nodes by increasing global lexicographic order
867 Array<LO> coarseNodeCoarseIndices(3), coarseNodeFineIndices(3), ijk(3);
868 LO currentIndex = -1;
869 for (ijk[2] = 0; ijk[2] < glCoarseNodesPerDir[2]; ++ijk[2]) {
870 for (ijk[1] = 0; ijk[1] < glCoarseNodesPerDir[1]; ++ijk[1]) {
871 for (ijk[0] = 0; ijk[0] < glCoarseNodesPerDir[0]; ++ijk[0]) {
872 currentIndex = ijk[2] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0] + ijk[1] * glCoarseNodesPerDir[0] + ijk[0];
873 coarseNodeCoarseIndices[0] = startGhostedCoarseNode[0] + ijk[0];
874 coarseNodeFineIndices[0] = coarseNodeCoarseIndices[0] * coarseRate[0];
875 if (coarseNodeFineIndices[0] > gFineNodesPerDir[0] - 1) {
876 coarseNodeFineIndices[0] = gFineNodesPerDir[0] - 1;
877 }
878 coarseNodeCoarseIndices[1] = startGhostedCoarseNode[1] + ijk[1];
879 coarseNodeFineIndices[1] = coarseNodeCoarseIndices[1] * coarseRate[1];
880 if (coarseNodeFineIndices[1] > gFineNodesPerDir[1] - 1) {
881 coarseNodeFineIndices[1] = gFineNodesPerDir[1] - 1;
882 }
883 coarseNodeCoarseIndices[2] = startGhostedCoarseNode[2] + ijk[2];
884 coarseNodeFineIndices[2] = coarseNodeCoarseIndices[2] * coarseRate[2];
885 if (coarseNodeFineIndices[2] > gFineNodesPerDir[2] - 1) {
886 coarseNodeFineIndices[2] = gFineNodesPerDir[2] - 1;
887 }
888 GO myGID = 0, myCoarseGID = -1;
889 GO factor[3] = {};
890 factor[2] = gFineNodesPerDir[1] * gFineNodesPerDir[0];
891 factor[1] = gFineNodesPerDir[0];
892 factor[0] = 1;
893 for (int dim = 0; dim < 3; ++dim) {
894 if (dim < numDimensions) {
895 if (gIndices[dim] - myOffset[dim] + ijk[dim] * coarseRate[dim] < gFineNodesPerDir[dim] - 1) {
896 myGID += (gIndices[dim] - myOffset[dim] + ijk[dim] * coarseRate[dim]) * factor[dim];
897 } else {
898 myGID += (gIndices[dim] - myOffset[dim] + (ijk[dim] - 1) * coarseRate[dim] + endRate[dim]) * factor[dim];
899 }
900 }
901 }
902 myCoarseGID = coarseNodeCoarseIndices[0] + coarseNodeCoarseIndices[1] * gCoarseNodesPerDir[0] + coarseNodeCoarseIndices[2] * gCoarseNodesPerDir[1] * gCoarseNodesPerDir[0];
903 ghostedCoarseNodes->GIDs[currentIndex] = myGID;
904 ghostedCoarseNodes->coarseGIDs[currentIndex] = myCoarseGID;
905 }
906 }
907 }
908 coordinatesMap->getRemoteIndexList(ghostedCoarseNodes->GIDs(),
909 ghostedCoarseNodes->PIDs(),
910 ghostedCoarseNodes->LIDs());
911
912 // Phase 2 //
913 // ------------------------------------------------------------------ //
914 // Build a list of GIDs to import the required ghost nodes. //
915 // The ordering of the ghosts nodes will be as natural as possible, //
916 // i.e. it should follow the GID ordering of the mesh. //
917 // ------------------------------------------------------------------ //
918
919 // Saddly we have to more or less redo what was just done to figure out the value of
920 // lNumGhostNodes, there might be some optimization possibility here...
921 ghostGIDs.resize(lNumGhostNodes);
922 LO countGhosts = 0;
923 // Get the GID of the first point on the current partition.
924 GO startingGID = minGlobalIndex;
925 Array<GO> startingIndices(3);
926 // We still want ghost nodes even if have with a 0 offset,
927 // except when we are on a boundary
928 if (ghostInterface[4] && (myOffset[2] == 0)) {
929 if (gIndices[2] + coarseRate[2] > gFineNodesPerDir[2]) {
930 startingGID -= endRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
931 } else {
932 startingGID -= coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
933 }
934 } else {
935 startingGID -= myOffset[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
936 }
937 if (ghostInterface[2] && (myOffset[1] == 0)) {
938 if (gIndices[1] + coarseRate[1] > gFineNodesPerDir[1]) {
939 startingGID -= endRate[1] * gFineNodesPerDir[0];
940 } else {
941 startingGID -= coarseRate[1] * gFineNodesPerDir[0];
942 }
943 } else {
944 startingGID -= myOffset[1] * gFineNodesPerDir[0];
945 }
946 if (ghostInterface[0] && (myOffset[0] == 0)) {
947 if (gIndices[0] + coarseRate[0] > gFineNodesPerDir[0]) {
948 startingGID -= endRate[0];
949 } else {
950 startingGID -= coarseRate[0];
951 }
952 } else {
953 startingGID -= myOffset[0];
954 }
955
956 { // scope for tmp
957 GO tmp;
958 startingIndices[2] = startingGID / (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
959 tmp = startingGID % (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
960 startingIndices[1] = tmp / gFineNodesPerDir[0];
961 startingIndices[0] = tmp % gFineNodesPerDir[0];
962 }
963
964 GO ghostOffset[3] = {0, 0, 0};
965 LO lengthZero = lCoarseNodesPerDir[0];
966 LO lengthOne = lCoarseNodesPerDir[1];
967 LO lengthTwo = lCoarseNodesPerDir[2];
968 if (ghostInterface[0]) {
969 ++lengthZero;
970 }
971 if (ghostInterface[1]) {
972 ++lengthZero;
973 }
974 if (ghostInterface[2]) {
975 ++lengthOne;
976 }
977 if (ghostInterface[3]) {
978 ++lengthOne;
979 }
980 if (ghostInterface[4]) {
981 ++lengthTwo;
982 }
983 if (ghostInterface[5]) {
984 ++lengthTwo;
985 }
986
987 // First check the bottom face as it will have the lowest GIDs
988 if (ghostInterface[4]) {
989 ghostOffset[2] = startingGID;
990 for (LO j = 0; j < lengthOne; ++j) {
991 if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
992 ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
993 } else {
994 ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
995 }
996 for (LO k = 0; k < lengthZero; ++k) {
997 if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
998 ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
999 } else {
1000 ghostOffset[0] = k * coarseRate[0];
1001 }
1002 // If the partition includes a changed rate at the edge, ghost nodes need to be picked
1003 // carefully.
1004 // This if statement is repeated each time a ghost node is selected.
1005 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1006 ++countGhosts;
1007 }
1008 }
1009 }
1010
1011 // Sweep over the lCoarseNodesPerDir[2] coarse layers in direction 2 and gather necessary ghost
1012 // nodes located on these layers.
1013 for (LO i = 0; i < lengthTwo; ++i) {
1014 // Exclude the cases where ghost nodes exists on the faces in directions 2, these faces are
1015 // swept seperatly for efficiency.
1016 if (!((i == lengthTwo - 1) && ghostInterface[5]) && !((i == 0) && ghostInterface[4])) {
1017 // Set the ghostOffset in direction 2 taking into account a possible endRate different
1018 // from the regular coarseRate.
1019 if ((i == lengthTwo - 1) && !ghostInterface[5]) {
1020 ghostOffset[2] = startingGID + ((i - 1) * coarseRate[2] + endRate[2]) * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1021 } else {
1022 ghostOffset[2] = startingGID + i * coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1023 }
1024 for (LO j = 0; j < lengthOne; ++j) {
1025 if ((j == 0) && ghostInterface[2]) {
1026 for (LO k = 0; k < lengthZero; ++k) {
1027 if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1028 if (k == 0) {
1029 ghostOffset[0] = endRate[0];
1030 } else {
1031 ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1032 }
1033 } else {
1034 ghostOffset[0] = k * coarseRate[0];
1035 }
1036 // In this case j == 0 so ghostOffset[1] is zero and can be ignored
1037 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[0];
1038 ++countGhosts;
1039 }
1040 } else if ((j == lengthOne - 1) && ghostInterface[3]) {
1041 // Set the ghostOffset in direction 1 taking into account a possible endRate different
1042 // from the regular coarseRate.
1043 if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1044 ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1045 } else {
1046 ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1047 }
1048 for (LO k = 0; k < lengthZero; ++k) {
1049 if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1050 ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1051 } else {
1052 ghostOffset[0] = k * coarseRate[0];
1053 }
1054 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1055 ++countGhosts;
1056 }
1057 } else {
1058 // Set ghostOffset[1] for side faces sweep
1059 if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1060 ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1061 } else {
1062 ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1063 }
1064
1065 // Set ghostOffset[0], ghostGIDs and countGhosts
1066 if (ghostInterface[0]) { // In that case ghostOffset[0]==0, so we can ignore it
1067 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1];
1068 ++countGhosts;
1069 }
1070 if (ghostInterface[1]) { // Grab ghost point at the end of direction 0.
1071 if ((startingIndices[0] + (lengthZero - 1) * coarseRate[0]) > gFineNodesPerDir[0] - 1) {
1072 if (lengthZero > 2) {
1073 ghostOffset[0] = (lengthZero - 2) * coarseRate[0] + endRate[0];
1074 } else {
1075 ghostOffset[0] = endRate[0];
1076 }
1077 } else {
1078 ghostOffset[0] = (lengthZero - 1) * coarseRate[0];
1079 }
1080 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1081 ++countGhosts;
1082 }
1083 }
1084 }
1085 }
1086 }
1087
1088 // Finally check the top face
1089 if (ghostInterface[5]) {
1090 if (startingIndices[2] + (lengthTwo - 1) * coarseRate[2] + 1 > gFineNodesPerDir[2]) {
1091 ghostOffset[2] = startingGID + ((lengthTwo - 2) * coarseRate[2] + endRate[2]) * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1092 } else {
1093 ghostOffset[2] = startingGID + (lengthTwo - 1) * coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1094 }
1095 for (LO j = 0; j < lengthOne; ++j) {
1096 if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1097 ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1098 } else {
1099 ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1100 }
1101 for (LO k = 0; k < lengthZero; ++k) {
1102 if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1103 ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1104 } else {
1105 ghostOffset[0] = k * coarseRate[0];
1106 }
1107 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1108 ++countGhosts;
1109 }
1110 }
1111 }
1112
1113 // Phase 3 //
1114 // ------------------------------------------------------------------ //
1115 // Final phase of this function, lists are being built to create the //
1116 // column and domain maps of the projection as well as the map and //
1117 // arrays for the coarse coordinates multivector. //
1118 // ------------------------------------------------------------------ //
1119
1120 Array<GO> gCoarseNodesGIDs(lNumCoarseNodes);
1121 LO currentNode, offset2, offset1, offset0;
1122 // Find the GIDs of the coarse nodes on the partition.
1123 for (LO ind2 = 0; ind2 < lCoarseNodesPerDir[2]; ++ind2) {
1124 if (myOffset[2] == 0) {
1125 offset2 = startingIndices[2] + myOffset[2];
1126 } else {
1127 if (startingIndices[2] + endRate[2] == gFineNodesPerDir[2] - 1) {
1128 offset2 = startingIndices[2] + endRate[2];
1129 } else {
1130 offset2 = startingIndices[2] + coarseRate[2];
1131 }
1132 }
1133 if (offset2 + ind2 * coarseRate[2] > gFineNodesPerDir[2] - 1) {
1134 offset2 += (ind2 - 1) * coarseRate[2] + endRate[2];
1135 } else {
1136 offset2 += ind2 * coarseRate[2];
1137 }
1138 offset2 = offset2 * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1139
1140 for (LO ind1 = 0; ind1 < lCoarseNodesPerDir[1]; ++ind1) {
1141 if (myOffset[1] == 0) {
1142 offset1 = startingIndices[1] + myOffset[1];
1143 } else {
1144 if (startingIndices[1] + endRate[1] == gFineNodesPerDir[1] - 1) {
1145 offset1 = startingIndices[1] + endRate[1];
1146 } else {
1147 offset1 = startingIndices[1] + coarseRate[1];
1148 }
1149 }
1150 if (offset1 + ind1 * coarseRate[1] > gFineNodesPerDir[1] - 1) {
1151 offset1 += (ind1 - 1) * coarseRate[1] + endRate[1];
1152 } else {
1153 offset1 += ind1 * coarseRate[1];
1154 }
1155 offset1 = offset1 * gFineNodesPerDir[0];
1156 for (LO ind0 = 0; ind0 < lCoarseNodesPerDir[0]; ++ind0) {
1157 offset0 = startingIndices[0];
1158 if (myOffset[0] == 0) {
1159 offset0 += ind0 * coarseRate[0];
1160 } else {
1161 offset0 += (ind0 + 1) * coarseRate[0];
1162 }
1163 if (offset0 > gFineNodesPerDir[0] - 1) {
1164 offset0 += endRate[0] - coarseRate[0];
1165 }
1166
1167 currentNode = ind2 * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0] + ind1 * lCoarseNodesPerDir[0] + ind0;
1168 gCoarseNodesGIDs[currentNode] = offset2 + offset1 + offset0;
1169 }
1170 }
1171 }
1172
1173 // Actual loop over all the coarse/ghost nodes to find their index on the coarse mesh
1174 // and the corresponding dofs that will need to be added to colMapP.
1175 colGIDs.resize(BlkSize * (lNumCoarseNodes + lNumGhostNodes));
1176 coarseNodesGIDs.resize(lNumCoarseNodes);
1177 for (LO i = 0; i < numDimensions; ++i) {
1178 coarseNodes[i].resize(lNumCoarseNodes);
1179 }
1180 GO fineNodesPerCoarseSlab = coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1181 GO fineNodesEndCoarseSlab = endRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1182 GO fineNodesPerCoarsePlane = coarseRate[1] * gFineNodesPerDir[0];
1183 GO fineNodesEndCoarsePlane = endRate[1] * gFineNodesPerDir[0];
1184 GO coarseNodesPerCoarseLayer = gCoarseNodesPerDir[1] * gCoarseNodesPerDir[0];
1185 GO gCoarseNodeOnCoarseGridGID;
1186 LO gInd[3], lCol;
1187 Array<int> ghostPIDs(lNumGhostNodes);
1188 Array<LO> ghostLIDs(lNumGhostNodes);
1189 Array<LO> ghostPermut(lNumGhostNodes);
1190 for (LO k = 0; k < lNumGhostNodes; ++k) {
1191 ghostPermut[k] = k;
1192 }
1193 coordinatesMap->getRemoteIndexList(ghostGIDs, ghostPIDs, ghostLIDs);
1194 sh_sort_permute(ghostPIDs.begin(), ghostPIDs.end(), ghostPermut.begin(), ghostPermut.end());
1195
1196 { // scope for tmpInds, tmpVars and tmp.
1197 GO tmpInds[3], tmpVars[2];
1198 LO tmp;
1199 // Loop over the coarse nodes of the partition and add them to colGIDs
1200 // that will be used to construct the column and domain maps of P as well
1201 // as to construct the coarse coordinates map.
1202 // for(LO col = 0; col < lNumCoarseNodes; ++col) { // This should most likely be replaced
1203 // by loops of lCoarseNodesPerDir[] to simplify arithmetics
1204 LO col = 0;
1205 LO firstCoarseNodeInds[3], currentCoarseNode;
1206 for (LO dim = 0; dim < 3; ++dim) {
1207 if (myOffset[dim] == 0) {
1208 firstCoarseNodeInds[dim] = 0;
1209 } else {
1210 firstCoarseNodeInds[dim] = coarseRate[dim] - myOffset[dim];
1211 }
1212 }
1213 Array<ArrayRCP<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
1214 for (LO dim = 0; dim < numDimensions; ++dim) {
1215 fineNodes[dim] = coordinates->getData(dim);
1216 }
1217 for (LO k = 0; k < lCoarseNodesPerDir[2]; ++k) {
1218 for (LO j = 0; j < lCoarseNodesPerDir[1]; ++j) {
1219 for (LO i = 0; i < lCoarseNodesPerDir[0]; ++i) {
1220 col = k * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0] + j * lCoarseNodesPerDir[0] + i;
1221
1222 // Check for endRate
1223 currentCoarseNode = 0;
1224 if (firstCoarseNodeInds[0] + i * coarseRate[0] > lFineNodesPerDir[0] - 1) {
1225 currentCoarseNode += firstCoarseNodeInds[0] + (i - 1) * coarseRate[0] + endRate[0];
1226 } else {
1227 currentCoarseNode += firstCoarseNodeInds[0] + i * coarseRate[0];
1228 }
1229 if (firstCoarseNodeInds[1] + j * coarseRate[1] > lFineNodesPerDir[1] - 1) {
1230 currentCoarseNode += (firstCoarseNodeInds[1] + (j - 1) * coarseRate[1] + endRate[1]) * lFineNodesPerDir[0];
1231 } else {
1232 currentCoarseNode += (firstCoarseNodeInds[1] + j * coarseRate[1]) * lFineNodesPerDir[0];
1233 }
1234 if (firstCoarseNodeInds[2] + k * coarseRate[2] > lFineNodesPerDir[2] - 1) {
1235 currentCoarseNode += (firstCoarseNodeInds[2] + (k - 1) * coarseRate[2] + endRate[2]) * lFineNodesPerDir[1] * lFineNodesPerDir[0];
1236 } else {
1237 currentCoarseNode += (firstCoarseNodeInds[2] + k * coarseRate[2]) * lFineNodesPerDir[1] * lFineNodesPerDir[0];
1238 }
1239 // Load coordinates
1240 for (LO dim = 0; dim < numDimensions; ++dim) {
1241 coarseNodes[dim][col] = fineNodes[dim][currentCoarseNode];
1242 }
1243
1244 if ((endRate[2] != coarseRate[2]) && (gCoarseNodesGIDs[col] > (gCoarseNodesPerDir[2] - 2) * fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1245 tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab + 1;
1246 tmpVars[0] = gCoarseNodesGIDs[col] - (tmpInds[2] - 1) * fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1247 } else {
1248 tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab;
1249 tmpVars[0] = gCoarseNodesGIDs[col] % fineNodesPerCoarseSlab;
1250 }
1251 if ((endRate[1] != coarseRate[1]) && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2) * fineNodesPerCoarsePlane + fineNodesEndCoarsePlane - 1)) {
1252 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1253 tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1) * fineNodesPerCoarsePlane - fineNodesEndCoarsePlane;
1254 } else {
1255 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1256 tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1257 }
1258 if (tmpVars[1] == gFineNodesPerDir[0] - 1) {
1259 tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1260 } else {
1261 tmpInds[0] = tmpVars[1] / coarseRate[0];
1262 }
1263 gInd[2] = col / (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]);
1264 tmp = col % (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]);
1265 gInd[1] = tmp / lCoarseNodesPerDir[0];
1266 gInd[0] = tmp % lCoarseNodesPerDir[0];
1267 lCol = gInd[2] * (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]) + gInd[1] * lCoarseNodesPerDir[0] + gInd[0];
1268 gCoarseNodeOnCoarseGridGID = tmpInds[2] * coarseNodesPerCoarseLayer + tmpInds[1] * gCoarseNodesPerDir[0] + tmpInds[0];
1269 coarseNodesGIDs[lCol] = gCoarseNodeOnCoarseGridGID;
1270 for (LO dof = 0; dof < BlkSize; ++dof) {
1271 colGIDs[BlkSize * lCol + dof] = BlkSize * gCoarseNodeOnCoarseGridGID + dof;
1272 }
1273 }
1274 }
1275 }
1276 // Now loop over the ghost nodes of the partition to add them to colGIDs
1277 // since they will need to be included in the column map of P
1278 for (col = lNumCoarseNodes; col < lNumCoarseNodes + lNumGhostNodes; ++col) {
1279 if ((endRate[2] != coarseRate[2]) && (ghostGIDs[ghostPermut[col - lNumCoarseNodes]] > (gCoarseNodesPerDir[2] - 2) * fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1280 tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab + 1;
1281 tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] - (tmpInds[2] - 1) * fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1282 } else {
1283 tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab;
1284 tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] % fineNodesPerCoarseSlab;
1285 }
1286 if ((endRate[1] != coarseRate[1]) && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2) * fineNodesPerCoarsePlane + fineNodesEndCoarsePlane - 1)) {
1287 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1288 tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1) * fineNodesPerCoarsePlane - fineNodesEndCoarsePlane;
1289 } else {
1290 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1291 tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1292 }
1293 if (tmpVars[1] == gFineNodesPerDir[0] - 1) {
1294 tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1295 } else {
1296 tmpInds[0] = tmpVars[1] / coarseRate[0];
1297 }
1298 gCoarseNodeOnCoarseGridGID = tmpInds[2] * coarseNodesPerCoarseLayer + tmpInds[1] * gCoarseNodesPerDir[0] + tmpInds[0];
1299 for (LO dof = 0; dof < BlkSize; ++dof) {
1300 colGIDs[BlkSize * col + dof] = BlkSize * gCoarseNodeOnCoarseGridGID + dof;
1301 }
1302 }
1303 } // End of scope for tmpInds, tmpVars and tmp
1304
1305} // GetGeometricData()
1306
1307template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1309 ComputeLocalEntries(const RCP<const Matrix>& Aghost, const Array<LO> coarseRate,
1310 const Array<LO> /* endRate */, const LO BlkSize, const Array<LO> elemInds,
1311 const Array<LO> /* lCoarseElementsPerDir */, const LO numDimensions,
1312 const Array<LO> lFineNodesPerDir, const Array<GO> /* gFineNodesPerDir */,
1313 const Array<GO> /* gIndices */, const Array<LO> /* lCoarseNodesPerDir */,
1314 const Array<bool> ghostInterface, const Array<int> elementFlags,
1315 const std::string stencilType, const std::string /* blockStrategy */,
1316 const Array<LO> elementNodesPerDir, const LO numNodesInElement,
1317 const Array<GO> /* colGIDs */,
1318 Teuchos::SerialDenseMatrix<LO, SC>& Pi, Teuchos::SerialDenseMatrix<LO, SC>& Pf,
1319 Teuchos::SerialDenseMatrix<LO, SC>& Pe, Array<LO>& dofType,
1320 Array<LO>& lDofInd) const {
1321 // First extract data from Aghost and move it to the corresponding dense matrix
1322 // This step requires to compute the number of nodes (resp dofs) in the current
1323 // coarse element, then allocate storage for local dense matrices, finally loop
1324 // over the dofs and extract the corresponding row in Aghost before dispactching
1325 // its data to the dense matrices.
1326 // At the same time we want to compute a mapping from the element indexing to
1327 // group indexing. The groups are the following: corner, edge, face and interior
1328 // nodes. We uses these groups to operate on the dense matrices but need to
1329 // store the nodes in their original order after groupd operations are completed.
1330 LO countInterior = 0, countFace = 0, countEdge = 0, countCorner = 0;
1331 Array<LO> collapseDir(numNodesInElement * BlkSize);
1332 for (LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1333 for (LO je = 0; je < elementNodesPerDir[1]; ++je) {
1334 for (LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1335 // Check for corner node
1336 if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1337 for (LO dof = 0; dof < BlkSize; ++dof) {
1338 dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1339 lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countCorner + dof;
1340 }
1341 ++countCorner;
1342
1343 // Check for edge node
1344 } else if (((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) || ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) || ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1))) {
1345 for (LO dof = 0; dof < BlkSize; ++dof) {
1346 dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1347 lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countEdge + dof;
1348 if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) {
1349 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1350 } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1351 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1352 } else if ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1353 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1354 }
1355 }
1356 ++countEdge;
1357
1358 // Check for face node
1359 } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) || (je == 0 || je == elementNodesPerDir[1] - 1) || (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1360 for (LO dof = 0; dof < BlkSize; ++dof) {
1361 dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1362 lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countFace + dof;
1363 if (ke == 0 || ke == elementNodesPerDir[2] - 1) {
1364 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1365 } else if (je == 0 || je == elementNodesPerDir[1] - 1) {
1366 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1367 } else if (ie == 0 || ie == elementNodesPerDir[0] - 1) {
1368 collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1369 }
1370 }
1371 ++countFace;
1372
1373 // Otherwise it is an interior node
1374 } else {
1375 for (LO dof = 0; dof < BlkSize; ++dof) {
1376 dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 3;
1377 lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countInterior + dof;
1378 }
1379 ++countInterior;
1380 }
1381 }
1382 }
1383 }
1384
1385 LO numInteriorNodes = 0, numFaceNodes = 0, numEdgeNodes = 0, numCornerNodes = 8;
1386 numInteriorNodes = (elementNodesPerDir[0] - 2) * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[2] - 2);
1387 numFaceNodes = 2 * (elementNodesPerDir[0] - 2) * (elementNodesPerDir[1] - 2) + 2 * (elementNodesPerDir[0] - 2) * (elementNodesPerDir[2] - 2) + 2 * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[2] - 2);
1388 numEdgeNodes = 4 * (elementNodesPerDir[0] - 2) + 4 * (elementNodesPerDir[1] - 2) + 4 * (elementNodesPerDir[2] - 2);
1389 // Diagonal blocks
1390 Teuchos::SerialDenseMatrix<LO, SC> Aii(BlkSize * numInteriorNodes, BlkSize * numInteriorNodes);
1391 Teuchos::SerialDenseMatrix<LO, SC> Aff(BlkSize * numFaceNodes, BlkSize * numFaceNodes);
1392 Teuchos::SerialDenseMatrix<LO, SC> Aee(BlkSize * numEdgeNodes, BlkSize * numEdgeNodes);
1393 // Upper triangular blocks
1394 Teuchos::SerialDenseMatrix<LO, SC> Aif(BlkSize * numInteriorNodes, BlkSize * numFaceNodes);
1395 Teuchos::SerialDenseMatrix<LO, SC> Aie(BlkSize * numInteriorNodes, BlkSize * numEdgeNodes);
1396 Teuchos::SerialDenseMatrix<LO, SC> Afe(BlkSize * numFaceNodes, BlkSize * numEdgeNodes);
1397 // Coarse nodes "right hand sides" and local prolongators
1398 Teuchos::SerialDenseMatrix<LO, SC> Aic(BlkSize * numInteriorNodes, BlkSize * numCornerNodes);
1399 Teuchos::SerialDenseMatrix<LO, SC> Afc(BlkSize * numFaceNodes, BlkSize * numCornerNodes);
1400 Teuchos::SerialDenseMatrix<LO, SC> Aec(BlkSize * numEdgeNodes, BlkSize * numCornerNodes);
1401 Pi.shape(BlkSize * numInteriorNodes, BlkSize * numCornerNodes);
1402 Pf.shape(BlkSize * numFaceNodes, BlkSize * numCornerNodes);
1403 Pe.shape(BlkSize * numEdgeNodes, BlkSize * numCornerNodes);
1404
1405 ArrayView<const LO> rowIndices;
1406 ArrayView<const SC> rowValues;
1407 LO idof, iInd, jInd;
1408 int iType = 0, jType = 0;
1409 int orientation = -1;
1410 int collapseFlags[3] = {};
1411 Array<SC> stencil((std::pow(3, numDimensions)) * BlkSize);
1412 // LBV Note: we could skip the extraction of rows corresponding to coarse nodes
1413 // we might want to fuse the first three loops and do integer arithmetic
1414 // to have more optimization during compilation...
1415 for (LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1416 for (LO je = 0; je < elementNodesPerDir[1]; ++je) {
1417 for (LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1418 collapseFlags[0] = 0;
1419 collapseFlags[1] = 0;
1420 collapseFlags[2] = 0;
1421 if ((elementFlags[0] == 1 || elementFlags[0] == 3) && ie == 0) {
1422 collapseFlags[0] += 1;
1423 }
1424 if ((elementFlags[0] == 2 || elementFlags[0] == 3) && ie == elementNodesPerDir[0] - 1) {
1425 collapseFlags[0] += 2;
1426 }
1427 if ((elementFlags[1] == 1 || elementFlags[1] == 3) && je == 0) {
1428 collapseFlags[1] += 1;
1429 }
1430 if ((elementFlags[1] == 2 || elementFlags[1] == 3) && je == elementNodesPerDir[1] - 1) {
1431 collapseFlags[1] += 2;
1432 }
1433 if ((elementFlags[2] == 1 || elementFlags[2] == 3) && ke == 0) {
1434 collapseFlags[2] += 1;
1435 }
1436 if ((elementFlags[2] == 2 || elementFlags[2] == 3) && ke == elementNodesPerDir[2] - 1) {
1437 collapseFlags[2] += 2;
1438 }
1439
1440 // Based on (ie, je, ke) we detect the type of node we are dealing with
1441 GetNodeInfo(ie, je, ke, elementNodesPerDir, &iType, iInd, &orientation);
1442 for (LO dof0 = 0; dof0 < BlkSize; ++dof0) {
1443 // Compute the dof index for each dof at point (ie, je, ke)
1444 idof = ((elemInds[2] * coarseRate[2] + ke) * lFineNodesPerDir[1] * lFineNodesPerDir[0] + (elemInds[1] * coarseRate[1] + je) * lFineNodesPerDir[0] + elemInds[0] * coarseRate[0] + ie) * BlkSize + dof0;
1445 Aghost->getLocalRowView(idof, rowIndices, rowValues);
1446 FormatStencil(BlkSize, ghostInterface, ie, je, ke, rowValues,
1447 elementNodesPerDir, collapseFlags, stencilType, stencil);
1448 LO io, jo, ko;
1449 if (iType == 3) { // interior node, no stencil collapse needed
1450 for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1451 // Find (if, jf, kf) the indices associated with the interacting node
1452 ko = ke + (interactingNode / 9 - 1);
1453 {
1454 LO tmp = interactingNode % 9;
1455 jo = je + (tmp / 3 - 1);
1456 io = ie + (tmp % 3 - 1);
1457 int dummy;
1458 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1459 }
1460 if ((io > -1 && io < elementNodesPerDir[0]) &&
1461 (jo > -1 && jo < elementNodesPerDir[1]) &&
1462 (ko > -1 && ko < elementNodesPerDir[2])) {
1463 for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1464 if (jType == 3) {
1465 Aii(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1466 stencil[interactingNode * BlkSize + dof1];
1467 } else if (jType == 2) {
1468 Aif(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1469 stencil[interactingNode * BlkSize + dof1];
1470 } else if (jType == 1) {
1471 Aie(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1472 stencil[interactingNode * BlkSize + dof1];
1473 } else if (jType == 0) {
1474 Aic(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1475 -stencil[interactingNode * BlkSize + dof1];
1476 }
1477 }
1478 }
1479 }
1480 } else if (iType == 2) { // Face node, collapse stencil along face normal (*orientation)
1481 CollapseStencil(2, orientation, collapseFlags, stencil);
1482 for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1483 // Find (if, jf, kf) the indices associated with the interacting node
1484 ko = ke + (interactingNode / 9 - 1);
1485 {
1486 LO tmp = interactingNode % 9;
1487 jo = je + (tmp / 3 - 1);
1488 io = ie + (tmp % 3 - 1);
1489 int dummy;
1490 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1491 }
1492 if ((io > -1 && io < elementNodesPerDir[0]) &&
1493 (jo > -1 && jo < elementNodesPerDir[1]) &&
1494 (ko > -1 && ko < elementNodesPerDir[2])) {
1495 for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1496 if (jType == 2) {
1497 Aff(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1498 stencil[interactingNode * BlkSize + dof1];
1499 } else if (jType == 1) {
1500 Afe(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1501 stencil[interactingNode * BlkSize + dof1];
1502 } else if (jType == 0) {
1503 Afc(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1504 -stencil[interactingNode * BlkSize + dof1];
1505 }
1506 }
1507 }
1508 }
1509 } else if (iType == 1) { // Edge node, collapse stencil perpendicular to edge direction
1510 CollapseStencil(1, orientation, collapseFlags, stencil);
1511 for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1512 // Find (if, jf, kf) the indices associated with the interacting node
1513 ko = ke + (interactingNode / 9 - 1);
1514 {
1515 LO tmp = interactingNode % 9;
1516 jo = je + (tmp / 3 - 1);
1517 io = ie + (tmp % 3 - 1);
1518 int dummy;
1519 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1520 }
1521 if ((io > -1 && io < elementNodesPerDir[0]) &&
1522 (jo > -1 && jo < elementNodesPerDir[1]) &&
1523 (ko > -1 && ko < elementNodesPerDir[2])) {
1524 for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1525 if (jType == 1) {
1526 Aee(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1527 stencil[interactingNode * BlkSize + dof1];
1528 } else if (jType == 0) {
1529 Aec(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1530 -stencil[interactingNode * BlkSize + dof1];
1531 }
1532 } // Check if interacting node is in element or not
1533 } // Pc is the identity so no need to treat iType == 0
1534 } // Loop over interacting nodes within a row
1535 } // Check for current node type
1536 } // Loop over degrees of freedom associated to a given node
1537 } // Loop over i
1538 } // Loop over j
1539 } // Loop over k
1540
1541 // Compute the projection operators for edge and interior nodes
1542 //
1543 // [P_i] = [A_{ii} & A_{if} & A_{ie}]^{-1} [A_{ic}]
1544 // [P_f] = [ 0 & A_{ff} & A_{fe}] [A_{fc}]
1545 // [P_e] = [ 0 & 0 & A_{ee}] [A_{ec}]
1546 // [P_c] = I_c
1547 //
1548 { // Compute P_e
1549 // We need to solve for P_e in: A_{ee}*P_e = A_{fc}
1550 // So we simple do P_e = A_{ee}^{-1)*A_{ec}
1551 Teuchos::SerialDenseSolver<LO, SC> problem;
1552 problem.setMatrix(Teuchos::rcp(&Aee, false));
1553 problem.setVectors(Teuchos::rcp(&Pe, false), Teuchos::rcp(&Aec, false));
1554 problem.factorWithEquilibration(true);
1555 problem.solve();
1556 problem.unequilibrateLHS();
1557 }
1558
1559 { // Compute P_f
1560 // We need to solve for P_f in: A_{ff}*P_f + A_{fe}*P_e = A_{fc}
1561 // Step one: A_{fc} = 1.0*A_{fc} + (-1.0)*A_{fe}*P_e
1562 Afc.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Afe, Pe, 1.0);
1563 // Step two: P_f = A_{ff}^{-1}*A_{fc}
1564 Teuchos::SerialDenseSolver<LO, SC> problem;
1565 problem.setMatrix(Teuchos::rcp(&Aff, false));
1566 problem.setVectors(Teuchos::rcp(&Pf, false), Teuchos::rcp(&Afc, false));
1567 problem.factorWithEquilibration(true);
1568 problem.solve();
1569 problem.unequilibrateLHS();
1570 }
1571
1572 { // Compute P_i
1573 // We need to solve for P_i in: A_{ii}*P_i + A_{if}*P_f + A_{ie}*P_e = A_{ic}
1574 // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{ie}*P_e
1575 Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aie, Pe, 1.0);
1576 // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{if}*P_f
1577 Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aif, Pf, 1.0);
1578 // Step two: P_i = A_{ii}^{-1}*A_{ic}
1579 Teuchos::SerialDenseSolver<LO, SC> problem;
1580 problem.setMatrix(Teuchos::rcp(&Aii, false));
1581 problem.setVectors(Teuchos::rcp(&Pi, false), Teuchos::rcp(&Aic, false));
1582 problem.factorWithEquilibration(true);
1583 problem.solve();
1584 problem.unequilibrateLHS();
1585 }
1586
1587} // ComputeLocalEntries()
1588
1589template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1591 CollapseStencil(const int type, const int orientation, const int /* collapseFlags */[3],
1592 Array<SC>& stencil) const {
1593 if (type == 2) { // Face stencil collapse
1594 // Let's hope things vectorize well inside this if statement
1595 if (orientation == 0) {
1596 for (LO i = 0; i < 9; ++i) {
1597 stencil[3 * i + 1] = stencil[3 * i] + stencil[3 * i + 1] + stencil[3 * i + 2];
1598 stencil[3 * i] = 0;
1599 stencil[3 * i + 2] = 0;
1600 }
1601 } else if (orientation == 1) {
1602 for (LO i = 0; i < 3; ++i) {
1603 stencil[9 * i + 3] = stencil[9 * i + 0] + stencil[9 * i + 3] + stencil[9 * i + 6];
1604 stencil[9 * i + 0] = 0;
1605 stencil[9 * i + 6] = 0;
1606 stencil[9 * i + 4] = stencil[9 * i + 1] + stencil[9 * i + 4] + stencil[9 * i + 7];
1607 stencil[9 * i + 1] = 0;
1608 stencil[9 * i + 7] = 0;
1609 stencil[9 * i + 5] = stencil[9 * i + 2] + stencil[9 * i + 5] + stencil[9 * i + 8];
1610 stencil[9 * i + 2] = 0;
1611 stencil[9 * i + 8] = 0;
1612 }
1613 } else if (orientation == 2) {
1614 for (LO i = 0; i < 9; ++i) {
1615 stencil[i + 9] = stencil[i] + stencil[i + 9] + stencil[i + 18];
1616 stencil[i] = 0;
1617 stencil[i + 18] = 0;
1618 }
1619 }
1620 } else if (type == 1) { // Edge stencil collapse
1621 SC tmp1 = 0, tmp2 = 0, tmp3 = 0;
1622 if (orientation == 0) {
1623 for (LO i = 0; i < 9; ++i) {
1624 tmp1 += stencil[0 + 3 * i];
1625 stencil[0 + 3 * i] = 0;
1626 tmp2 += stencil[1 + 3 * i];
1627 stencil[1 + 3 * i] = 0;
1628 tmp3 += stencil[2 + 3 * i];
1629 stencil[2 + 3 * i] = 0;
1630 }
1631 stencil[12] = tmp1;
1632 stencil[13] = tmp2;
1633 stencil[14] = tmp3;
1634 } else if (orientation == 1) {
1635 for (LO i = 0; i < 3; ++i) {
1636 tmp1 += stencil[0 + i] + stencil[9 + i] + stencil[18 + i];
1637 stencil[0 + i] = 0;
1638 stencil[9 + i] = 0;
1639 stencil[18 + i] = 0;
1640 tmp2 += stencil[3 + i] + stencil[12 + i] + stencil[21 + i];
1641 stencil[3 + i] = 0;
1642 stencil[12 + i] = 0;
1643 stencil[21 + i] = 0;
1644 tmp3 += stencil[6 + i] + stencil[15 + i] + stencil[24 + i];
1645 stencil[6 + i] = 0;
1646 stencil[15 + i] = 0;
1647 stencil[24 + i] = 0;
1648 }
1649 stencil[10] = tmp1;
1650 stencil[13] = tmp2;
1651 stencil[16] = tmp3;
1652 } else if (orientation == 2) {
1653 for (LO i = 0; i < 9; ++i) {
1654 tmp1 += stencil[i];
1655 stencil[i] = 0;
1656 tmp2 += stencil[i + 9];
1657 stencil[i + 9] = 0;
1658 tmp3 += stencil[i + 18];
1659 stencil[i + 18] = 0;
1660 }
1661 stencil[4] = tmp1;
1662 stencil[13] = tmp2;
1663 stencil[22] = tmp3;
1664 }
1665 }
1666} // CollapseStencil
1667
1668template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1670 FormatStencil(const LO BlkSize, const Array<bool> /* ghostInterface */, const LO /* ie */, const LO /* je */,
1671 const LO /* ke */, const ArrayView<const SC> rowValues, const Array<LO> /* elementNodesPerDir */,
1672 const int collapseFlags[3], const std::string stencilType, Array<SC>& stencil)
1673 const {
1674 if (stencilType == "reduced") {
1675 Array<int> fullStencilInds(7);
1676 fullStencilInds[0] = 4;
1677 fullStencilInds[1] = 10;
1678 fullStencilInds[2] = 12;
1679 fullStencilInds[3] = 13;
1680 fullStencilInds[4] = 14;
1681 fullStencilInds[5] = 16;
1682 fullStencilInds[6] = 22;
1683
1684 // Create a mask array to automate mesh boundary processing
1685 Array<int> mask(7);
1686 int stencilSize = rowValues.size();
1687 if (collapseFlags[0] + collapseFlags[1] + collapseFlags[2] > 0) {
1688 if (stencilSize == 1) {
1689 // The assumption is made that if only one non-zero exists in the row
1690 // then this represent a Dirichlet BC being enforced.
1691 // One might want to zero out the corresponding row in the prolongator.
1692 mask[0] = 1;
1693 mask[1] = 1;
1694 mask[2] = 1;
1695 mask[4] = 1;
1696 mask[5] = 1;
1697 mask[6] = 1;
1698 } else {
1699 // Here we are looking at Neumann like BC where only a flux is prescribed
1700 // and less non-zeros will be present in the corresponding row.
1701 if (collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1702 mask[0] = 1;
1703 }
1704 if (collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1705 mask[6] = 1;
1706 }
1707 if (collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1708 mask[1] = 1;
1709 }
1710 if (collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1711 mask[5] = 1;
1712 }
1713 if (collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1714 mask[2] = 1;
1715 }
1716 if (collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1717 mask[4] = 1;
1718 }
1719 }
1720 }
1721
1722 int offset = 0;
1723 for (int ind = 0; ind < 7; ++ind) {
1724 if (mask[ind] == 1) {
1725 for (int dof = 0; dof < BlkSize; ++dof) {
1726 stencil[BlkSize * fullStencilInds[ind] + dof] = 0.0;
1727 }
1728 ++offset; // The offset is used to stay within rowValues bounds
1729 } else {
1730 for (int dof = 0; dof < BlkSize; ++dof) {
1731 stencil[BlkSize * fullStencilInds[ind] + dof] = rowValues[BlkSize * (ind - offset) + dof];
1732 }
1733 }
1734 }
1735 } else if (stencilType == "full") {
1736 // Create a mask array to automate mesh boundary processing
1737 Array<int> mask(27);
1738 if (collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1739 for (int ind = 0; ind < 9; ++ind) {
1740 mask[ind] = 1;
1741 }
1742 }
1743 if (collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1744 for (int ind = 0; ind < 9; ++ind) {
1745 mask[18 + ind] = 1;
1746 }
1747 }
1748 if (collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1749 for (int ind1 = 0; ind1 < 3; ++ind1) {
1750 for (int ind2 = 0; ind2 < 3; ++ind2) {
1751 mask[ind1 * 9 + ind2] = 1;
1752 }
1753 }
1754 }
1755 if (collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1756 for (int ind1 = 0; ind1 < 3; ++ind1) {
1757 for (int ind2 = 0; ind2 < 3; ++ind2) {
1758 mask[ind1 * 9 + ind2 + 6] = 1;
1759 }
1760 }
1761 }
1762 if (collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1763 for (int ind = 0; ind < 9; ++ind) {
1764 mask[3 * ind] = 1;
1765 }
1766 }
1767 if (collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1768 for (int ind = 0; ind < 9; ++ind) {
1769 mask[3 * ind + 2] = 1;
1770 }
1771 }
1772
1773 // Now the stencil is extracted and formated
1774 int offset = 0;
1775 for (LO ind = 0; ind < 27; ++ind) {
1776 if (mask[ind] == 0) {
1777 for (int dof = 0; dof < BlkSize; ++dof) {
1778 stencil[BlkSize * ind + dof] = 0.0;
1779 }
1780 ++offset; // The offset is used to stay within rowValues bounds
1781 } else {
1782 for (int dof = 0; dof < BlkSize; ++dof) {
1783 stencil[BlkSize * ind + dof] = rowValues[BlkSize * (ind - offset) + dof];
1784 }
1785 }
1786 }
1787 } // stencilTpye
1788} // FormatStencil()
1789
1790template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1792 const LO ie, const LO je, const LO ke,
1793 const Array<LO> elementNodesPerDir,
1794 int* type, LO& ind, int* orientation) const {
1795 // Initialize flags with -1 to be able to catch issues easily
1796 *type = -1, ind = 0, *orientation = -1;
1797 if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1798 // Corner node
1799 *type = 0;
1800 ind = 4 * ke / (elementNodesPerDir[2] - 1) + 2 * je / (elementNodesPerDir[1] - 1) + ie / (elementNodesPerDir[0] - 1);
1801 } else if (((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) || ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) || ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1))) {
1802 // Edge node
1803 *type = 1;
1804 if (ke > 0) {
1805 ind += 2 * (elementNodesPerDir[0] - 2 + elementNodesPerDir[1] - 2);
1806 }
1807 if (ke == elementNodesPerDir[2] - 1) {
1808 ind += 4 * (elementNodesPerDir[2] - 2);
1809 }
1810 if ((ke == 0) || (ke == elementNodesPerDir[2] - 1)) { // je or ie edges
1811 if (je == 0) { // jlo ie edge
1812 *orientation = 0;
1813 ind += ie - 1;
1814 } else if (je == elementNodesPerDir[1] - 1) { // jhi ie edge
1815 *orientation = 0;
1816 ind += 2 * (elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1817 } else { // ilo or ihi je edge
1818 *orientation = 1;
1819 ind += elementNodesPerDir[0] - 2 + 2 * (je - 1) + ie / (elementNodesPerDir[0] - 1);
1820 }
1821 } else { // ke edges
1822 *orientation = 2;
1823 ind += 4 * (ke - 1) + 2 * (je / (elementNodesPerDir[1] - 1)) + ie / (elementNodesPerDir[0] - 1);
1824 }
1825 } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) || (je == 0 || je == elementNodesPerDir[1] - 1) || (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1826 // Face node
1827 *type = 2;
1828 if (ke == 0) { // current node is on "bottom" face
1829 *orientation = 2;
1830 ind = (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1831 } else {
1832 // add nodes from "bottom" face
1833 ind += (elementNodesPerDir[1] - 2) * (elementNodesPerDir[0] - 2);
1834 // add nodes from side faces
1835 ind += 2 * (ke - 1) * (elementNodesPerDir[1] - 2 + elementNodesPerDir[0] - 2);
1836 if (ke == elementNodesPerDir[2] - 1) { // current node is on "top" face
1837 *orientation = 2;
1838 ind += (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1839 } else { // current node is on a side face
1840 if (je == 0) {
1841 *orientation = 1;
1842 ind += ie - 1;
1843 } else if (je == elementNodesPerDir[1] - 1) {
1844 *orientation = 1;
1845 ind += 2 * (elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1846 } else {
1847 *orientation = 0;
1848 ind += elementNodesPerDir[0] - 2 + 2 * (je - 1) + ie / (elementNodesPerDir[0] - 1);
1849 }
1850 }
1851 }
1852 } else {
1853 // Interior node
1854 *type = 3;
1855 ind = (ke - 1) * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[0] - 2) + (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1856 }
1857} // GetNodeInfo()
1858
1859template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1861 const typename Teuchos::Array<LocalOrdinal>::iterator& first1,
1862 const typename Teuchos::Array<LocalOrdinal>::iterator& last1,
1863 const typename Teuchos::Array<LocalOrdinal>::iterator& first2,
1864 const typename Teuchos::Array<LocalOrdinal>::iterator& /* last2 */) const {
1865 typedef typename std::iterator_traits<typename Teuchos::Array<LocalOrdinal>::iterator>::difference_type DT;
1866 DT n = last1 - first1;
1867 DT m = n / 2;
1868 DT z = Teuchos::OrdinalTraits<DT>::zero();
1869 while (m > z) {
1870 DT max = n - m;
1871 for (DT j = 0; j < max; j++) {
1872 for (DT k = j; k >= 0; k -= m) {
1873 if (first1[first2[k + m]] >= first1[first2[k]])
1874 break;
1875 std::swap(first2[k + m], first2[k]);
1876 }
1877 }
1878 m = m / 2;
1879 }
1880}
1881
1882} // namespace MueLu
1883
1884#define MUELU_BLACKBOXPFACTORY_SHORT
1885#endif // MUELU_BLACKBOXPFACTORY_DEF_HPP
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void FormatStencil(const LO BlkSize, const Array< bool > ghostInterface, const LO ie, const LO je, const LO ke, const ArrayView< const SC > rowValues, const Array< LO > elementNodesPerDir, const int collapseFlags[3], const std::string stencilType, Array< SC > &stencil) const
void GetNodeInfo(const LO ie, const LO je, const LO ke, const Array< LO > elementNodesPerDir, int *type, LO &ind, int *orientation) const
void sh_sort_permute(const typename Teuchos::Array< LocalOrdinal >::iterator &first1, const typename Teuchos::Array< LocalOrdinal >::iterator &last1, const typename Teuchos::Array< LocalOrdinal >::iterator &first2, const typename Teuchos::Array< LocalOrdinal >::iterator &last2) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void ComputeLocalEntries(const RCP< const Matrix > &Aghost, const Array< LO > coarseRate, const Array< LO > endRate, const LO BlkSize, const Array< LO > elemInds, const Array< LO > lCoarseElementsPerDir, const LO numDimensions, const Array< LO > lFineNodesPerDir, const Array< GO > gFineNodesPerDir, const Array< GO > gIndices, const Array< LO > lCoarseNodesPerDir, const Array< bool > ghostInterface, const Array< int > elementFlags, const std::string stencilType, const std::string blockStrategy, const Array< LO > elementNodesPerDir, const LO numNodesInElement, const Array< GO > colGIDs, Teuchos::SerialDenseMatrix< LO, SC > &Pi, Teuchos::SerialDenseMatrix< LO, SC > &Pf, Teuchos::SerialDenseMatrix< LO, SC > &Pe, Array< LO > &dofType, Array< LO > &lDofInd) const
void CollapseStencil(const int type, const int orientation, const int collapseFlags[3], Array< SC > &stencil) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void GetGeometricData(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LO, GO, NO > > &coordinates, const Array< LO > coarseRate, const Array< GO > gFineNodesPerDir, const Array< LO > lFineNodesPerDir, const LO BlkSize, Array< GO > &gIndices, Array< LO > &myOffset, Array< bool > &ghostInterface, Array< LO > &endRate, Array< GO > &gCoarseNodesPerDir, Array< LO > &lCoarseNodesPerDir, Array< LO > &glCoarseNodesPerDir, Array< GO > &ghostGIDs, Array< GO > &coarseNodesGIDs, Array< GO > &colGIDs, GO &gNumCoarseNodes, LO &lNumCoarseNodes, ArrayRCP< Array< typename Teuchos::ScalarTraits< Scalar >::magnitudeType > > coarseNodes, Array< int > &boundaryFlags, RCP< NodesIDs > ghostedCoarseNodes) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput().
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static const NoFactory * get()
virtual const Teuchos::ParameterList & GetParameterList() const
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
Namespace for MueLu classes and methods.
@ Runtime0
One-liner description of what is happening.