MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AggregationPhase2aAlgorithm_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
11#define MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_
12
13#include <Teuchos_Comm.hpp>
14#include <Teuchos_CommHelpers.hpp>
15
16#include <Xpetra_Vector.hpp>
17
19
20#include "MueLu_LWGraph.hpp"
21#include "MueLu_Aggregates.hpp"
22#include "MueLu_Exceptions.hpp"
23#include "MueLu_Monitor.hpp"
24
25#include "Kokkos_Sort.hpp"
26
27namespace MueLu {
28
29template <class LocalOrdinal, class GlobalOrdinal, class Node>
31 Monitor m(*this, "BuildAggregatesNonKokkos");
32
33 int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
34 int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
35 bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
36
37 const LO numRows = graph.GetNodeNumVertices();
38 const int myRank = graph.GetComm()->getRank();
39
40 ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
41 ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
42
43 LO numLocalAggregates = aggregates.GetNumAggregates();
44
45 LO numLocalNodes = procWinner.size();
46 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
47
48 const double aggFactor = params.get<double>("aggregation: phase2a agg factor");
49 double factor;
50
51 if (matchMLbehavior) {
52 // Note: ML uses global counts to set the factor
53 // Passing # of nonaggregated nodes and # of nodes via aggStat
54 GO in_data[2] = {(GO)numNonAggregatedNodes, (GO)aggStat.size()};
55 GO out_data[2];
56 Teuchos::reduceAll(*graph.GetComm(), Teuchos::REDUCE_SUM, 2, in_data, out_data);
57 GO phase_one_aggregated = out_data[1] - out_data[0];
58 factor = as<double>(phase_one_aggregated) / (out_data[1] + 1);
59
60 LO agg_stat_unaggregated = 0;
61 LO agg_stat_aggregated = 0;
62 LO agg_stat_bdry = 0;
63 for (LO i = 0; i < (LO)aggStat.size(); i++) {
64 if (aggStat[i] == AGGREGATED)
65 agg_stat_aggregated++;
66 else if (aggStat[i] == BOUNDARY)
67 agg_stat_bdry++;
68 else
69 agg_stat_unaggregated++;
70 }
71
72 // NOTE: ML always uses 3 as minNodesPerAggregate
73 minNodesPerAggregate = 3;
74
75 } else {
76 // MueLu defaults to using local counts to set the factor
77 factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
78 }
79
80 // Now apply aggFactor
81 factor = pow(factor, aggFactor);
82
83 int aggIndex = -1;
84 size_t aggSize = 0;
85 std::vector<int> aggList(graph.getLocalMaxNumRowEntries());
86
87 for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) {
88 if (aggStat[rootCandidate] != READY) {
89 continue;
90 }
91
92 LO numNeighbors = 0;
93 aggSize = 0;
94 if (matchMLbehavior) {
95 aggList[aggSize++] = rootCandidate;
96 numNeighbors++;
97 }
98
99 auto neighOfINode = graph.getNeighborVertices(rootCandidate);
100
101 LO num_nonaggd_neighbors = 0, num_local_neighbors = 0;
102 for (int j = 0; j < neighOfINode.length; j++) {
103 LO neigh = neighOfINode(j);
104 if (graph.isLocalNeighborVertex(neigh))
105 num_local_neighbors++;
106
107 if (neigh != rootCandidate) {
108 if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) {
109 // If aggregate size does not exceed max size, add node to the tentative aggregate
110 // NOTE: We do not exit the loop over all neighbours since we have still
111 // to count all aggregated neighbour nodes for the aggregation criteria
112 // NOTE: We check here for the maximum aggregation size. If we would do it below
113 // with all the other check too big aggregates would not be accepted at all.
114 if (aggSize < as<size_t>(maxNodesPerAggregate))
115 aggList[aggSize++] = neigh;
116 num_nonaggd_neighbors++;
117 }
118
119 numNeighbors++;
120 }
121 }
122
123 bool accept_aggregate;
124 if (matchMLbehavior) {
125 // ML does this calculation slightly differently than MueLu does by default, specifically it
126 // uses the *local* number of neigbors, regardless of what they are.
127 // NOTE: ML does zero compression here. Not sure if it matters
128 // NOTE: ML uses a hardcoded value 3 instead of minNodesPerAggregate. This has been set above
129 LO rowi_N = num_local_neighbors;
130 num_nonaggd_neighbors++; // ML counts the node itself as a nonaggd_neighbor
131 accept_aggregate = (rowi_N > as<LO>(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor * rowi_N));
132 } else {
133 accept_aggregate = (aggSize > as<size_t>(minNodesPerAggregate)) && (aggSize > factor * numNeighbors);
134 }
135
136 if (accept_aggregate) {
137 // Accept new aggregate
138 // rootCandidate becomes the root of the newly formed aggregate
139 aggregates.SetIsRoot(rootCandidate);
140 aggIndex = numLocalAggregates++;
141
142 for (size_t k = 0; k < aggSize; k++) {
143 aggStat[aggList[k]] = AGGREGATED;
144 vertex2AggId[aggList[k]] = aggIndex;
145 procWinner[aggList[k]] = myRank;
146 }
147
148 numNonAggregatedNodes -= aggSize;
149 }
150 }
151
152 // update aggregate object
153 aggregates.SetNumAggregates(numLocalAggregates);
154}
155
156template <class LocalOrdinal, class GlobalOrdinal, class Node>
158 BuildAggregates(const ParameterList& params,
159 const LWGraph_kokkos& graph,
160 Aggregates& aggregates,
162 LO& numNonAggregatedNodes) const {
163 if (params.get<bool>("aggregation: deterministic")) {
164 Monitor m(*this, "BuildAggregatesDeterministic");
165 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
166 } else {
167 Monitor m(*this, "BuildAggregatesRandom");
168 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
169 }
170
171} // BuildAggregates
172
173template <class LO, class GO, class Node>
175 BuildAggregatesRandom(const ParameterList& params,
176 const LWGraph_kokkos& graph,
177 Aggregates& aggregates,
179 LO& numNonAggregatedNodes) const {
180 using device_type = typename LWGraph_kokkos::device_type;
181 using execution_space = typename LWGraph_kokkos::execution_space;
182
183 const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
184 const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
185 bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2a");
186
187 const LO numRows = graph.GetNodeNumVertices();
188 const int myRank = graph.GetComm()->getRank();
189
190 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
191 auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
192 auto colors = aggregates.GetGraphColors();
193 const LO numColors = aggregates.GetGraphNumColors();
194
195 auto lclLWGraph = graph;
196
197 LO numLocalNodes = numRows;
198 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
199
200 const double aggFactor = 0.5;
201 double factor = static_cast<double>(numLocalAggregated) / (numLocalNodes + 1);
202 factor = pow(factor, aggFactor);
203
204 // LBV on Sept 12, 2019: this looks a little heavy handed,
205 // I'm not sure a view is needed to perform atomic updates.
206 // If we can avoid this and use a simple LO that would be
207 // simpler for later maintenance.
208 Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
209 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
210 Kokkos::create_mirror_view(numLocalAggregates);
211 h_numLocalAggregates() = aggregates.GetNumAggregates();
212 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
213
214 // Now we create new aggregates using root nodes in all colors other than the first color,
215 // as the first color was already exhausted in Phase 1.
216 for (int color = 2; color < numColors + 1; ++color) {
217 LO tmpNumNonAggregatedNodes = 0;
218 Kokkos::parallel_reduce(
219 "Aggregation Phase 2a: loop over each individual color",
220 Kokkos::RangePolicy<execution_space>(0, numRows),
221 KOKKOS_LAMBDA(const LO rootCandidate, LO& lNumNonAggregatedNodes) {
222 if (aggStat(rootCandidate) == READY &&
223 colors(rootCandidate) == color) {
224 LO numNeighbors = 0;
225 LO aggSize = 0;
226 if (matchMLbehavior) {
227 aggSize += 1;
228 numNeighbors += 1;
229 }
230
231 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
232
233 // Loop over neighbors to count how many nodes could join
234 // the new aggregate
235
236 for (int j = 0; j < neighbors.length; ++j) {
237 LO neigh = neighbors(j);
238 if (neigh != rootCandidate) {
239 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
240 (aggStat(neigh) == READY) &&
241 (aggSize < maxNodesPerAggregate)) {
242 ++aggSize;
243 }
244 ++numNeighbors;
245 }
246 }
247
248 // If a sufficient number of nodes can join the new aggregate
249 // then we actually create the aggregate.
250 if (aggSize > minNodesPerAggregate &&
251 (aggSize > factor * numNeighbors)) {
252 // aggregates.SetIsRoot(rootCandidate);
253 LO aggIndex = Kokkos::
254 atomic_fetch_add(&numLocalAggregates(), 1);
255
256 LO numAggregated = 0;
257
258 if (matchMLbehavior) {
259 // Add the root.
260 aggStat(rootCandidate) = AGGREGATED;
261 vertex2AggId(rootCandidate, 0) = aggIndex;
262 procWinner(rootCandidate, 0) = myRank;
263 ++numAggregated;
264 --lNumNonAggregatedNodes;
265 }
266
267 for (int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
268 LO neigh = neighbors(neighIdx);
269 if (neigh != rootCandidate) {
270 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
271 (aggStat(neigh) == READY) &&
272 (numAggregated < aggSize)) {
273 aggStat(neigh) = AGGREGATED;
274 vertex2AggId(neigh, 0) = aggIndex;
275 procWinner(neigh, 0) = myRank;
276
277 ++numAggregated;
278 --lNumNonAggregatedNodes;
279 }
280 }
281 }
282 }
283 }
284 },
285 tmpNumNonAggregatedNodes);
286 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
287 }
288
289 // update aggregate object
290 Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
291 aggregates.SetNumAggregates(h_numLocalAggregates());
292} // BuildAggregatesRandom
293
294template <class LO, class GO, class Node>
296 BuildAggregatesDeterministic(const ParameterList& params,
297 const LWGraph_kokkos& graph,
298 Aggregates& aggregates,
300 LO& numNonAggregatedNodes) const {
301 using device_type = typename LWGraph_kokkos::device_type;
302 using execution_space = typename LWGraph_kokkos::execution_space;
303
304 const int minNodesPerAggregate = params.get<int>("aggregation: min agg size");
305 const int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
306
307 const LO numRows = graph.GetNodeNumVertices();
308 const int myRank = graph.GetComm()->getRank();
309
310 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
311 auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
312 auto colors = aggregates.GetGraphColors();
313 const LO numColors = aggregates.GetGraphNumColors();
314
315 auto lclLWGraph = graph;
316
317 LO numLocalNodes = procWinner.size();
318 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
319
320 const double aggFactor = 0.5;
321 double factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
322 factor = pow(factor, aggFactor);
323
324 Kokkos::View<LO, device_type> numLocalAggregates("numLocalAggregates");
325 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
326 Kokkos::create_mirror_view(numLocalAggregates);
327 h_numLocalAggregates() = aggregates.GetNumAggregates();
328 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
329
330 // Now we create new aggregates using root nodes in all colors other than the first color,
331 // as the first color was already exhausted in Phase 1.
332 //
333 // In the deterministic version, exactly the same set of aggregates will be created
334 // (as the nondeterministic version)
335 // because no vertex V can be a neighbor of two vertices of the same color, so two root
336 // candidates can't fight over V
337 //
338 // But, the precise values in vertex2AggId need to match exactly, so just sort the new
339 // roots of each color before assigning aggregate IDs
340
341 // numNonAggregatedNodes is the best available upper bound for the number of aggregates
342 // which may be created in this phase, so use it for the size of newRoots
343 Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
344 Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
345 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
346 for (int color = 1; color < numColors + 1; ++color) {
347 h_numNewRoots() = 0;
348 Kokkos::deep_copy(numNewRoots, h_numNewRoots);
349 Kokkos::parallel_for(
350 "Aggregation Phase 2a: determining new roots of current color",
351 Kokkos::RangePolicy<execution_space>(0, numRows),
352 KOKKOS_LAMBDA(const LO rootCandidate) {
353 if (aggStat(rootCandidate) == READY &&
354 colors(rootCandidate) == color) {
355 LO aggSize = 0;
356 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
357 // Loop over neighbors to count how many nodes could join
358 // the new aggregate
359 LO numNeighbors = 0;
360 for (int j = 0; j < neighbors.length; ++j) {
361 LO neigh = neighbors(j);
362 if (neigh != rootCandidate) {
363 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
364 aggStat(neigh) == READY &&
365 aggSize < maxNodesPerAggregate) {
366 ++aggSize;
367 }
368 ++numNeighbors;
369 }
370 }
371 // If a sufficient number of nodes can join the new aggregate
372 // then we mark rootCandidate as a future root.
373 if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) {
374 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
375 newRoots(newRootIndex) = rootCandidate;
376 }
377 }
378 });
379 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
380
381 if (h_numNewRoots() > 0) {
382 // sort the new root indices
383 Kokkos::sort(newRoots, 0, h_numNewRoots());
384 // now, loop over all new roots again and actually create the aggregates
385 LO tmpNumNonAggregatedNodes = 0;
386 // First, just find the set of color vertices which will become aggregate roots
387 Kokkos::parallel_reduce(
388 "Aggregation Phase 2a: create new aggregates",
389 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
390 KOKKOS_LAMBDA(const LO newRootIndex, LO& lNumNonAggregatedNodes) {
391 LO root = newRoots(newRootIndex);
392 LO newAggID = numLocalAggregates() + newRootIndex;
393 auto neighbors = lclLWGraph.getNeighborVertices(root);
394 // Loop over neighbors and add them to new aggregate
395 aggStat(root) = AGGREGATED;
396 vertex2AggId(root, 0) = newAggID;
397 LO aggSize = 1;
398 for (int j = 0; j < neighbors.length; ++j) {
399 LO neigh = neighbors(j);
400 if (neigh != root) {
401 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
402 aggStat(neigh) == READY &&
403 aggSize < maxNodesPerAggregate) {
404 aggStat(neigh) = AGGREGATED;
405 vertex2AggId(neigh, 0) = newAggID;
406 procWinner(neigh, 0) = myRank;
407 aggSize++;
408 }
409 }
410 }
411 lNumNonAggregatedNodes -= aggSize;
412 },
413 tmpNumNonAggregatedNodes);
414 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
415 h_numLocalAggregates() += h_numNewRoots();
416 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
417 }
418 }
419 aggregates.SetNumAggregates(h_numLocalAggregates());
420}
421
422} // namespace MueLu
423
424#endif /* MUELU_AGGREGATIONPHASE2AALGORITHM_DEF_HPP_ */
Container class for aggregation information.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
void SetIsRoot(LO i, bool value=true)
Set root node information.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
void BuildAggregatesDeterministic(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesRandom(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
const RCP< const Teuchos::Comm< int > > GetComm() const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Lightweight MueLu representation of a compressed row storage graph.
Lightweight MueLu representation of a compressed row storage graph.
Timer to be used in non-factories.
Namespace for MueLu classes and methods.