MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SemiCoarsenPFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
11#define MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
12
13#include <stdlib.h>
14
15#include <Kokkos_Core.hpp>
16
17#include <KokkosBatched_LU_Decl.hpp>
18#include <KokkosBatched_LU_Serial_Impl.hpp>
19#include <KokkosBatched_Trsm_Decl.hpp>
20#include <KokkosBatched_Trsm_Serial_Impl.hpp>
21#include <KokkosBatched_Util.hpp>
22#include <KokkosSparse_CrsMatrix.hpp>
23
24#include <Xpetra_CrsMatrixWrap.hpp>
25#include <Xpetra_ImportFactory.hpp>
26#include <Xpetra_Matrix.hpp>
27#include <Xpetra_MultiVectorFactory.hpp>
28#include <Xpetra_VectorFactory.hpp>
29
31#include "MueLu_MasterList.hpp"
32#include "MueLu_Monitor.hpp"
34
35namespace MueLu {
36
37template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
38
39RCP<const ParameterList>
42 RCP<ParameterList> validParamList = rcp(new ParameterList());
43
44 std::string name = "semicoarsen: coarsen rate";
45 validParamList->setEntry(name, MasterList::getEntry(name));
46 validParamList->set<RCP<const FactoryBase>>(
47 "A", Teuchos::null, "Generating factory of the matrix A");
48 validParamList->set<RCP<const FactoryBase>>(
49 "Nullspace", Teuchos::null, "Generating factory of the nullspace");
50 validParamList->set<RCP<const FactoryBase>>(
51 "Coordinates", Teuchos::null, "Generating factory for coordinates");
52
53 validParamList->set<RCP<const FactoryBase>>(
54 "LineDetection_VertLineIds", Teuchos::null,
55 "Generating factory for LineDetection vertical line ids");
56 validParamList->set<RCP<const FactoryBase>>(
57 "LineDetection_Layers", Teuchos::null,
58 "Generating factory for LineDetection layer ids");
59 validParamList->set<RCP<const FactoryBase>>(
60 "CoarseNumZLayers", Teuchos::null,
61 "Generating factory for number of coarse z-layers");
62
63 return validParamList;
64}
65
66template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
67 class Node>
71 DeclareInput(Level &fineLevel, Level & /* coarseLevel */) const {
72 Input(fineLevel, "A");
73 Input(fineLevel, "Nullspace");
74
75 Input(fineLevel, "LineDetection_VertLineIds");
76 Input(fineLevel, "LineDetection_Layers");
77 Input(fineLevel, "CoarseNumZLayers");
78
79 // check whether fine level coordinate information is available.
80 // If yes, request the fine level coordinates and generate coarse coordinates
81 // during the Build call
82 if (fineLevel.GetLevelID() == 0) {
83 if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
84 fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
86 }
87 } else if (bTransferCoordinates_ == true) {
88 // on coarser levels we check the default factory providing "Coordinates"
89 // or the factory declared to provide "Coordinates"
90 // first, check which factory is providing coordinate information
91 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
92 if (myCoordsFact == Teuchos::null) {
93 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
94 }
95 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
96 fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
97 }
98 }
99}
100
101template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
102 class Node>
104 Build(Level &fineLevel,
105 Level &coarseLevel)
106 const {
107 return BuildP(fineLevel, coarseLevel);
108}
109
110template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
111 class Node>
113 BuildP(Level &fineLevel,
114 Level &coarseLevel)
115 const {
116 FactoryMonitor m(*this, "Build", coarseLevel);
117
118 // obtain general variables
119 RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
120 RCP<MultiVector> fineNullspace =
121 Get<RCP<MultiVector>>(fineLevel, "Nullspace");
122
123 // get user-provided coarsening rate parameter (constant over all levels)
124 const ParameterList &pL = GetParameterList();
125 LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
126 TEUCHOS_TEST_FOR_EXCEPTION(
127 CoarsenRate < 2, Exceptions::RuntimeError,
128 "semicoarsen: coarsen rate must be greater than 1");
129
130 // collect general input data
131 LO BlkSize = A->GetFixedBlockSize();
132 RCP<const Map> rowMap = A->getRowMap();
133 LO Ndofs = rowMap->getLocalNumElements();
134 LO Nnodes = Ndofs / BlkSize;
135
136 // collect line detection information generated by the LineDetectionFactory
137 // instance
138 LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
139 Teuchos::ArrayRCP<LO> TVertLineId =
140 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_VertLineIds");
141 Teuchos::ArrayRCP<LO> TLayerId =
142 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_Layers");
143
144 // compute number of coarse layers
145 TEUCHOS_TEST_FOR_EXCEPTION(FineNumZLayers < 2, Exceptions::RuntimeError,
146 "Cannot coarsen further");
147 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
148 LO CoarseNumZLayers =
149 (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
150 if (CoarseNumZLayers < 1)
151 CoarseNumZLayers = 1;
152
153 // generate transfer operator with semicoarsening
154 RCP<Matrix> P;
155 RCP<MultiVector> coarseNullspace;
156 BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
157 CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
158 coarseNullspace);
159
160 // Store number of coarse z-layers on the coarse level container
161 // This information is used by the LineDetectionAlgorithm
162 // TODO get rid of the NoFactory
163 coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
165
166 // store semicoarsening transfer on coarse level
167 Set(coarseLevel, "P", P);
168 Set(coarseLevel, "Nullspace", coarseNullspace);
169
170 // transfer coordinates
172 SubFactoryMonitor m2(*this, "TransferCoordinates", coarseLevel);
173 typedef Xpetra::MultiVector<
174 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO, NO>
175 xdMV;
176 RCP<xdMV> fineCoords = Teuchos::null;
177 if (fineLevel.GetLevelID() == 0 &&
178 fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
179 fineCoords = fineLevel.Get<RCP<xdMV>>("Coordinates", NoFactory::get());
180 } else {
181 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
182 if (myCoordsFact == Teuchos::null) {
183 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
184 }
185 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
186 fineCoords =
187 fineLevel.Get<RCP<xdMV>>("Coordinates", myCoordsFact.get());
188 }
189 }
190
191 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
193 "No Coordinates found provided by the user.");
194
195 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
197 "Three coordinates arrays must be supplied if "
198 "line detection orientation not given.");
199 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x =
200 fineCoords->getDataNonConst(0);
201 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y =
202 fineCoords->getDataNonConst(1);
203 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z =
204 fineCoords->getDataNonConst(2);
205
206 // determine the maximum and minimum z coordinate value on the current
207 // processor.
208 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
209 -Teuchos::ScalarTraits<
210 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
211 Teuchos::ScalarTraits<
212 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
213 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
214 Teuchos::ScalarTraits<
215 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
216 Teuchos::ScalarTraits<
217 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
218 for (auto it = z.begin(); it != z.end(); ++it) {
219 if (*it > zval_max)
220 zval_max = *it;
221 if (*it < zval_min)
222 zval_min = *it;
223 }
224
225 LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
226
227 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType>
228 myZLayerCoords = Teuchos::arcp<
229 typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
230 myCoarseZLayers);
231 if (myCoarseZLayers == 1) {
232 myZLayerCoords[0] = zval_min;
233 } else {
234 typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
235 (zval_max - zval_min) / (myCoarseZLayers - 1);
236 for (LO k = 0; k < myCoarseZLayers; ++k) {
237 myZLayerCoords[k] = k * dz;
238 }
239 }
240
241 // Note, that the coarse level node coordinates have to be in vertical
242 // ordering according to the numbering of the vertical lines
243
244 // number of vertical lines on current node:
245 LO numVertLines = Nnodes / FineNumZLayers;
246 LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
247
248 RCP<const Map> coarseCoordMap = MapFactory::Build(
249 fineCoords->getMap()->lib(),
250 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
251 Teuchos::as<size_t>(numLocalCoarseNodes),
252 fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
253 RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
254 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
255 NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
256 coarseCoords->putScalar(-1.0);
257 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx =
258 coarseCoords->getDataNonConst(0);
259 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy =
260 coarseCoords->getDataNonConst(1);
261 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz =
262 coarseCoords->getDataNonConst(2);
263
264 // loop over all vert line indices (stop as soon as possible)
265 LO cntCoarseNodes = 0;
266 for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
267 // vertical line id in *vt
268 LO curVertLineId = TVertLineId[vt];
269
270 if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
271 cy[curVertLineId * myCoarseZLayers] == -1.0) {
272 // loop over all local myCoarseZLayers
273 for (LO n = 0; n < myCoarseZLayers; ++n) {
274 cx[curVertLineId * myCoarseZLayers + n] = x[vt];
275 cy[curVertLineId * myCoarseZLayers + n] = y[vt];
276 cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
277 }
278 cntCoarseNodes += myCoarseZLayers;
279 }
280
281 TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
283 "number of coarse nodes is inconsistent.");
284 if (cntCoarseNodes == numLocalCoarseNodes)
285 break;
286 }
287
288 // set coarse level coordinates
289 Set(coarseLevel, "Coordinates", coarseCoords);
290 } /* end bool bTransferCoordinates */
291}
292
293template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
294 class Node>
298 BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes,
299 const LO DofsPerNode, const LO NFLayers,
300 const LO NCLayers, const ArrayRCP<LO> LayerId,
301 const ArrayRCP<LO> VertLineId, const RCP<Matrix> &Amat,
302 const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
303 RCP<MultiVector> &coarseNullspace) const {
304 SubFactoryMonitor m2(*this, "BuildSemiCoarsenP", coarseLevel);
305 using impl_SC = typename Kokkos::ArithTraits<SC>::val_type;
306 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
307 using LOView1D = Kokkos::View<LO *, DeviceType>;
308 using LOView2D = Kokkos::View<LO **, DeviceType>;
309
310 // Construct a map from fine level column to layer ids (including ghost nodes)
311 // Note: this is needed to sum all couplings within a layer
312 const auto FCol2LayerVector =
313 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
314 const auto localTemp =
315 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
316 RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
317 if (importer == Teuchos::null)
318 importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
319 {
320 // Fill local temp with layer ids and fill ghost nodes
321 const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
322 for (int row = 0; row < NFRows; row++)
323 localTempHost(row, 0) = LayerId[row / DofsPerNode];
324 const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
325 Kokkos::deep_copy(localTempView, localTempHost);
326 FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
327 }
328 const auto FCol2LayerView = FCol2LayerVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
329 const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
330
331 // Construct a map from fine level column to local dof per node id (including
332 // ghost nodes) Note: this is needed to sum all couplings within a layer
333 const auto FCol2DofVector =
334 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
335 {
336 // Fill local temp with local dof per node ids and fill ghost nodes
337 const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
338 for (int row = 0; row < NFRows; row++)
339 localTempHost(row, 0) = row % DofsPerNode;
340 const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
341 Kokkos::deep_copy(localTempView, localTempHost);
342 FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
343 }
344 const auto FCol2DofView = FCol2DofVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
345 const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
346
347 // Compute NVertLines
348 // TODO: Read this from line detection factory
349 int NVertLines = -1;
350 if (NFNodes != 0)
351 NVertLines = VertLineId[0];
352 for (int node = 1; node < NFNodes; ++node)
353 if (VertLineId[node] > NVertLines)
354 NVertLines = VertLineId[node];
355 NVertLines++;
356
357 // Construct a map from Line, Layer ids to fine level node
358 LOView2D LineLayer2Node("LineLayer2Node", NVertLines, NFLayers);
359 typename LOView2D::HostMirror LineLayer2NodeHost =
360 Kokkos::create_mirror_view(LineLayer2Node);
361 for (int node = 0; node < NFNodes; ++node)
362 LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
363 Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
364
365 // Construct a map from coarse layer id to fine layer id
366 LOView1D CLayer2FLayer("CLayer2FLayer", NCLayers);
367 typename LOView1D::HostMirror CLayer2FLayerHost =
368 Kokkos::create_mirror_view(CLayer2FLayer);
369 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
370 const LO FirstStride =
371 (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
372 const coordT RestStride =
373 ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
374 const LO NCpts =
375 (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
376 TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError,
377 "sizes do not match.");
378 coordT stride = (coordT)FirstStride;
379 for (int clayer = 0; clayer < NCLayers; ++clayer) {
380 CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
381 stride += RestStride;
382 }
383 Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
384
385 // Compute start layer and stencil sizes for layer interpolation at each
386 // coarse layer
387 int MaxStencilSize = 1;
388 LOView1D CLayer2StartLayer("CLayer2StartLayer", NCLayers);
389 LOView1D CLayer2StencilSize("CLayer2StencilSize", NCLayers);
390 typename LOView1D::HostMirror CLayer2StartLayerHost =
391 Kokkos::create_mirror_view(CLayer2StartLayer);
392 typename LOView1D::HostMirror CLayer2StencilSizeHost =
393 Kokkos::create_mirror_view(CLayer2StencilSize);
394 for (int clayer = 0; clayer < NCLayers; ++clayer) {
395 const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
396 const int stencilSize = (clayer < NCLayers - 1)
397 ? CLayer2FLayerHost(clayer + 1) - startLayer
398 : NFLayers - startLayer;
399
400 if (MaxStencilSize < stencilSize)
401 MaxStencilSize = stencilSize;
402 CLayer2StartLayerHost(clayer) = startLayer;
403 CLayer2StencilSizeHost(clayer) = stencilSize;
404 }
405 Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
406 Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
407
408 // Allocate storage for the coarse layer interpolation matrices on all
409 // vertical lines Note: Contributions to each matrix are collapsed to vertical
410 // lines. Thus, each vertical line gives rise to a block tridiagonal matrix.
411 // Here we store the full matrix to be compatible with kokkos kernels batch LU
412 // and tringular solve.
413 int Nmax = MaxStencilSize * DofsPerNode;
414 Kokkos::View<impl_SC ***, DeviceType> BandMat(
415 "BandMat", NVertLines, Nmax, Nmax);
416 Kokkos::View<impl_SC ***, DeviceType> BandSol(
417 "BandSol", NVertLines, Nmax, DofsPerNode);
418
419 // Precompute number of nonzeros in prolongation matrix and allocate P views
420 // Note: Each coarse dof (NVertLines*NCLayers*DofsPerNode) contributes an
421 // interpolation stencil (StencilSize*DofsPerNode)
422 int NnzP = 0;
423 for (int clayer = 0; clayer < NCLayers; ++clayer)
424 NnzP += CLayer2StencilSizeHost(clayer);
425 NnzP *= NVertLines * DofsPerNode * DofsPerNode;
426 Kokkos::View<impl_SC *, DeviceType> Pvals("Pvals", NnzP);
427 Kokkos::View<LO *, DeviceType> Pcols("Pcols", NnzP);
428
429 // Precompute Pptr
430 // Note: Each coarse layer stencil dof contributes DofsPerNode to the
431 // corresponding row in P
432 Kokkos::View<size_t *, DeviceType> Pptr("Pptr", NFRows + 1);
433 typename Kokkos::View<size_t *, DeviceType>::HostMirror PptrHost =
434 Kokkos::create_mirror_view(Pptr);
435 Kokkos::deep_copy(PptrHost, 0);
436 for (int line = 0; line < NVertLines; ++line) {
437 for (int clayer = 0; clayer < NCLayers; ++clayer) {
438 const int stencilSize = CLayer2StencilSizeHost(clayer);
439 const int startLayer = CLayer2StartLayerHost(clayer);
440 for (int snode = 0; snode < stencilSize; ++snode) {
441 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
442 const int layer = startLayer + snode;
443 const int AmatBlkRow = LineLayer2NodeHost(line, layer);
444 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
445 PptrHost(AmatRow + 1) += DofsPerNode;
446 }
447 }
448 }
449 }
450 for (int i = 2; i < NFRows + 1; ++i)
451 PptrHost(i) += PptrHost(i - 1);
452 TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (int)PptrHost(NFRows),
454 "Number of nonzeros in P does not match");
455 Kokkos::deep_copy(Pptr, PptrHost);
456
457 // Precompute Pptr offsets
458 // Note: These are used to determine the nonzero index in Pvals and Pcols
459 Kokkos::View<LO *, Kokkos::DefaultHostExecutionSpace> layerBuckets(
460 "layerBuckets", NFLayers);
461 Kokkos::deep_copy(layerBuckets, 0);
462 LOView2D CLayerSNode2PptrOffset("CLayerSNode2PptrOffset", NCLayers,
463 MaxStencilSize);
464 typename LOView2D::HostMirror CLayerSNode2PptrOffsetHost =
465 Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
466 for (int clayer = 0; clayer < NCLayers; ++clayer) {
467 const int stencilSize = CLayer2StencilSizeHost(clayer);
468 const int startLayer = CLayer2StartLayerHost(clayer);
469 for (int snode = 0; snode < stencilSize; ++snode) {
470 const int layer = startLayer + snode;
471 CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
472 layerBuckets(layer)++;
473 }
474 }
475 Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
476
477 { // Fill P - fill and solve each block tridiagonal system and fill P views
478 SubFactoryMonitor m3(*this, "Fill P", coarseLevel);
479
480 const auto localAmat = Amat->getLocalMatrixDevice();
481 const auto zero = impl_ATS::zero();
482 const auto one = impl_ATS::one();
483
484 using range_policy = Kokkos::RangePolicy<execution_space>;
485 Kokkos::parallel_for(
486 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
487 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
488 for (int clayer = 0; clayer < NCLayers; ++clayer) {
489 // Initialize BandSol
490 auto bandSol =
491 Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
492 for (int row = 0; row < Nmax; ++row)
493 for (int dof = 0; dof < DofsPerNode; ++dof)
494 bandSol(row, dof) = zero;
495
496 // Initialize BandMat (set unused row diagonal to 1.0)
497 const int stencilSize = CLayer2StencilSize(clayer);
498 const int N = stencilSize * DofsPerNode;
499 auto bandMat =
500 Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
501 for (int row = 0; row < Nmax; ++row)
502 for (int col = 0; col < Nmax; ++col)
503 bandMat(row, col) =
504 (row == col && row >= N) ? one : zero;
505
506 // Loop over layers in stencil and fill banded matrix and rhs
507 const int flayer = CLayer2FLayer(clayer);
508 const int startLayer = CLayer2StartLayer(clayer);
509 for (int snode = 0; snode < stencilSize; ++snode) {
510 const int layer = startLayer + snode;
511 if (layer == flayer) { // If layer in stencil is a coarse layer
512 for (int dof = 0; dof < DofsPerNode; ++dof) {
513 const int row = snode * DofsPerNode + dof;
514 bandMat(row, row) = one;
515 bandSol(row, dof) = one;
516 }
517 } else { // Not a coarse layer
518 const int AmatBlkRow = LineLayer2Node(line, layer);
519 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
520 // Get Amat row info
521 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
522 const auto localAmatRow = localAmat.rowConst(AmatRow);
523 const int AmatRowLeng = localAmatRow.length;
524
525 const int row = snode * DofsPerNode + dofi;
526 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
527 const int col = snode * DofsPerNode + dofj;
528
529 // Sum values along row which correspond to stencil
530 // layer/dof and fill bandMat
531 auto val = zero;
532 for (int i = 0; i < AmatRowLeng; ++i) {
533 const int colidx = localAmatRow.colidx(i);
534 if (FCol2Layer(colidx) == layer &&
535 FCol2Dof(colidx) == dofj)
536 val += localAmatRow.value(i);
537 }
538 bandMat(row, col) = val;
539
540 if (snode > 0) {
541 // Sum values along row which correspond to stencil
542 // layer/dof below and fill bandMat
543 val = zero;
544 for (int i = 0; i < AmatRowLeng; ++i) {
545 const int colidx = localAmatRow.colidx(i);
546 if (FCol2Layer(colidx) == layer - 1 &&
547 FCol2Dof(colidx) == dofj)
548 val += localAmatRow.value(i);
549 }
550 bandMat(row, col - DofsPerNode) = val;
551 }
552
553 if (snode < stencilSize - 1) {
554 // Sum values along row which correspond to stencil
555 // layer/dof above and fill bandMat
556 val = zero;
557 for (int i = 0; i < AmatRowLeng; ++i) {
558 const int colidx = localAmatRow.colidx(i);
559 if (FCol2Layer(colidx) == layer + 1 &&
560 FCol2Dof(colidx) == dofj)
561 val += localAmatRow.value(i);
562 }
563 bandMat(row, col + DofsPerNode) = val;
564 }
565 }
566 }
567 }
568 }
569
570 // Batch LU and triangular solves
571 namespace KB = KokkosBatched;
572 using lu_type = typename KB::SerialLU<KB::Algo::LU::Unblocked>;
573 lu_type::invoke(bandMat);
574 using trsv_l_type =
575 typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
576 KB::Trans::NoTranspose, KB::Diag::Unit,
577 KB::Algo::Trsm::Unblocked>;
578 trsv_l_type::invoke(one, bandMat, bandSol);
579 using trsv_u_type = typename KB::SerialTrsm<
580 KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
581 KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
582 trsv_u_type::invoke(one, bandMat, bandSol);
583
584 // Fill prolongation views with solution
585 for (int snode = 0; snode < stencilSize; ++snode) {
586 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
587 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
588 const int layer = startLayer + snode;
589 const int AmatBlkRow = LineLayer2Node(line, layer);
590 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
591
592 const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
593 const int pptr =
594 Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
595
596 const int col =
597 line * NCLayers + clayer; // coarse node (block row) index
598 Pcols(pptr) = col * DofsPerNode + dofj;
599 Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
600 }
601 }
602 }
603 }
604 });
605 } // Fill P
606
607 // Build P
608 RCP<const Map> rowMap = Amat->getRowMap();
609 Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
610 Xpetra::global_size_t itemp = GNdofs / NFLayers;
611 std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
612 RCP<const Map> coarseMap = StridedMapFactory::Build(
613 rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
614 stridingInfo_, rowMap->getComm(), -1, 0);
615 P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
616 RCP<CrsMatrix> PCrs = toCrsMatrix(P);
617 PCrs->setAllValues(Pptr, Pcols, Pvals);
618 PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
619
620 // set StridingInformation of P
621 if (Amat->IsView("stridedMaps") == true)
622 P->CreateView("stridedMaps", Amat->getRowMap("stridedMaps"), coarseMap);
623 else
624 P->CreateView("stridedMaps", P->getRangeMap(), coarseMap);
625
626 // Construct coarse nullspace and inject fine nullspace
627 coarseNullspace =
628 MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
629 const int numVectors = fineNullspace->getNumVectors();
630 const auto fineNullspaceView = fineNullspace->getDeviceLocalView(Xpetra::Access::ReadOnly);
631 const auto coarseNullspaceView = coarseNullspace->getDeviceLocalView(Xpetra::Access::ReadWrite);
632 using range_policy = Kokkos::RangePolicy<execution_space>;
633 Kokkos::parallel_for(
634 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
635 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
636 for (int clayer = 0; clayer < NCLayers; ++clayer) {
637 const int layer = CLayer2FLayer(clayer);
638 const int AmatBlkRow =
639 LineLayer2Node(line, layer); // fine node (block row) index
640 const int col =
641 line * NCLayers + clayer; // coarse node (block row) index
642 for (int k = 0; k < numVectors; ++k) {
643 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
644 const int fRow = AmatBlkRow * DofsPerNode + dofi;
645 const int cRow = col * DofsPerNode + dofi;
646 coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);
647 }
648 }
649 }
650 });
651}
652
653} // namespace MueLu
654
655#define MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT
656#endif // MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
void Input(Level &level, const std::string &varName) const
T Get(Level &level, const std::string &varName) const
void Set(Level &level, const std::string &varName, const T &data) const
const RCP< const FactoryBase > GetFactory(const std::string &varName) const
Default implementation of FactoryAcceptor::GetFactory().
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput().
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const Teuchos::ParameterEntry & getEntry(const std::string &name)
Returns default entry from the "master" list corresponding to the specified name.
static const NoFactory * get()
virtual const Teuchos::ParameterList & GetParameterList() const
Prolongator factory performing semi-coarsening.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes, const LO DofsPerNode, const LO NFLayers, const LO NCLayers, const ArrayRCP< LO > LayerId, const ArrayRCP< LO > VertLineId, const RCP< Matrix > &Amat, const RCP< MultiVector > fineNullspace, RCP< Matrix > &P, RCP< MultiVector > &coarseNullspace) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Namespace for MueLu classes and methods.