MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AggregationPhase2bAlgorithm_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// MueLu: A package for multigrid based preconditioning
4//
5// Copyright 2012 NTESS and the MueLu contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
11#define MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
12
13#include <Teuchos_Comm.hpp>
14#include <Teuchos_CommHelpers.hpp>
15
16#include <Xpetra_Vector.hpp>
17
19
20#include "MueLu_Aggregates.hpp"
21#include "MueLu_Exceptions.hpp"
22#include "MueLu_LWGraph.hpp"
23#include "MueLu_Monitor.hpp"
24
25namespace MueLu {
26
27// Try to stick unaggregated nodes into a neighboring aggregate if they are
28// not already too big
29template <class LocalOrdinal, class GlobalOrdinal, class Node>
31 Monitor m(*this, "BuildAggregatesNonKokkos");
32 bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2b");
33
34 const LO numRows = graph.GetNodeNumVertices();
35 const int myRank = graph.GetComm()->getRank();
36
37 ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
38 ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0);
39
40 LO numLocalAggregates = aggregates.GetNumAggregates();
41
42 const LO defaultConnectWeight = 100;
43 const LO penaltyConnectWeight = 10;
44
45 std::vector<LO> aggWeight(numLocalAggregates, 0);
46 std::vector<LO> connectWeight(numRows, defaultConnectWeight);
47 std::vector<LO> aggPenalties(numRows, 0);
48
49 // We do this cycle twice.
50 // I don't know why, but ML does it too
51 // taw: by running the aggregation routine more than once there is a chance that also
52 // non-aggregated nodes with a node distance of two are added to existing aggregates.
53 // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
54 // should be sufficient.
55 for (int k = 0; k < 2; k++) {
56 for (LO i = 0; i < numRows; i++) {
57 if (aggStat[i] != READY)
58 continue;
59
60 auto neighOfINode = graph.getNeighborVertices(i);
61
62 for (int j = 0; j < neighOfINode.length; j++) {
63 LO neigh = neighOfINode(j);
64
65 // We don't check (neigh != i), as it is covered by checking (aggStat[neigh] == AGGREGATED)
66 if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED)
67 aggWeight[vertex2AggId[neigh]] += connectWeight[neigh];
68 }
69
70 int bestScore = -100000;
71 int bestAggId = -1;
72 int bestConnect = -1;
73
74 for (int j = 0; j < neighOfINode.length; j++) {
75 LO neigh = neighOfINode(j);
76 int aggId = vertex2AggId[neigh];
77
78 // Note: The third condition is only relevant if the ML matching is enabled
79 if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == AGGREGATED && (!matchMLbehavior || aggWeight[aggId] != 0)) {
80 int score = aggWeight[aggId] - aggPenalties[aggId];
81
82 if (score > bestScore) {
83 bestAggId = aggId;
84 bestScore = score;
85 bestConnect = connectWeight[neigh];
86
87 } else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) {
88 bestConnect = connectWeight[neigh];
89 }
90
91 // Reset the weights for the next loop
92 aggWeight[aggId] = 0;
93 }
94 }
95
96 if (bestScore >= 0) {
97 aggStat[i] = AGGREGATED;
98 vertex2AggId[i] = bestAggId;
99 procWinner[i] = myRank;
100
101 numNonAggregatedNodes--;
102
103 aggPenalties[bestAggId]++;
104 connectWeight[i] = bestConnect - penaltyConnectWeight;
105 }
106 }
107 }
108}
109
110// Try to stick unaggregated nodes into a neighboring aggregate if they are
111// not already too big
112template <class LocalOrdinal, class GlobalOrdinal, class Node>
114 BuildAggregates(const ParameterList& params,
115 const LWGraph_kokkos& graph,
116 Aggregates& aggregates,
118 LO& numNonAggregatedNodes) const {
119 if (params.get<bool>("aggregation: deterministic")) {
120 Monitor m(*this, "BuildAggregatesDeterministic");
121 BuildAggregates<true>(params, graph, aggregates, aggStat, numNonAggregatedNodes);
122 } else {
123 Monitor m(*this, "BuildAggregatesRandom");
124 BuildAggregates<false>(params, graph, aggregates, aggStat, numNonAggregatedNodes);
125 }
126
127} // BuildAggregates
128
129template <class AggStatType, class ProcWinnerType, class Vertex2AggType, class ColorsType, class LocalGraphType, class AggPenaltyType, class LO, bool deterministic, bool matchMLbehavior>
131 private:
132 AggStatType aggStat;
133 ProcWinnerType procWinner;
134 Vertex2AggType vertex2AggId;
135 ColorsType colors;
136 LocalGraphType lclLWGraph;
137 AggPenaltyType aggPenalties;
138 AggPenaltyType aggPenaltyUpdates;
139 AggPenaltyType connectWeight;
143
144 public:
145 ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& aggPenaltyUpdates_, AggPenaltyType& connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
146 : aggStat(aggStat_)
147 , procWinner(procWinner_)
148 , vertex2AggId(vertex2AggId_)
149 , colors(colors_)
150 , lclLWGraph(lclLWGraph_)
151 , aggPenalties(aggPenalties_)
152 , aggPenaltyUpdates(aggPenaltyUpdates_)
153 , connectWeight(connectWeight_)
154 , penaltyConnectWeight(penaltyConnectWeight_)
155 , color(color_)
156 , myRank(rank_) {}
157
158 ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
159 : aggStat(aggStat_)
160 , procWinner(procWinner_)
161 , vertex2AggId(vertex2AggId_)
162 , colors(colors_)
163 , lclLWGraph(lclLWGraph_)
164 , aggPenalties(aggPenalties_)
165 , connectWeight(connectWeight_)
166 , penaltyConnectWeight(penaltyConnectWeight_)
167 , color(color_)
168 , myRank(rank_) {}
169
170 KOKKOS_INLINE_FUNCTION
171 void operator()(const LO& i, LO& tmpNumAggregated) const {
172 if (aggStat(i) != READY || colors(i) != color)
173 return;
174
175 int bestScore = -100000;
176 int bestAggId = -1;
177 int bestConnect = -1;
178
179 auto neighOfINode = lclLWGraph.getNeighborVertices(i);
180
181 for (int j = 0; j < neighOfINode.length; j++) {
182 LO neigh = neighOfINode(j);
183
184 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
185 (aggStat(neigh) == AGGREGATED)) {
186 auto aggId = vertex2AggId(neigh, 0);
187 LO aggWeight = 0;
188 for (int k = 0; k < neighOfINode.length; k++) {
189 LO neigh2 = neighOfINode(k);
190 if (lclLWGraph.isLocalNeighborVertex(neigh2) &&
191 (aggStat(neigh2) == AGGREGATED) &&
192 (vertex2AggId(neigh2, 0) == aggId))
193 aggWeight += connectWeight(neigh2);
194 }
195
196 if (matchMLbehavior && (aggWeight == 0))
197 return;
198
199 int score = aggWeight - aggPenalties(aggId);
200
201 if (score > bestScore) {
202 bestAggId = aggId;
203 bestScore = score;
204 bestConnect = connectWeight(neigh);
205
206 } else if (aggId == bestAggId &&
207 connectWeight(neigh) > bestConnect) {
208 bestConnect = connectWeight(neigh);
209 }
210 }
211 }
212 if (bestScore >= 0) {
213 aggStat(i) = AGGREGATED;
214 vertex2AggId(i, 0) = bestAggId;
215 procWinner(i, 0) = myRank;
216
217 if constexpr (deterministic) {
218 Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
219 } else {
220 Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
221 }
222 connectWeight(i) = bestConnect - penaltyConnectWeight;
223 tmpNumAggregated++;
224 }
225 }
226};
227
228template <class LocalOrdinal, class GlobalOrdinal, class Node>
229template <bool deterministic>
231 BuildAggregates(const ParameterList& params,
232 const LWGraph_kokkos graph,
233 Aggregates& aggregates,
235 LO& numNonAggregatedNodes) const {
236 using device_type = typename LWGraph_kokkos::device_type;
237 using execution_space = typename LWGraph_kokkos::execution_space;
238
239 bool matchMLbehavior = params.get<bool>("aggregation: match ML phase2b");
240
241 const LO numRows = graph.GetNodeNumVertices();
242 const int myRank = graph.GetComm()->getRank();
243
244 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
245 auto procWinner = aggregates.GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
246 auto colors = aggregates.GetGraphColors();
247 const LO numColors = aggregates.GetGraphNumColors();
248 const LO numLocalAggregates = aggregates.GetNumAggregates();
249
250 const LO defaultConnectWeight = 100;
251 const LO penaltyConnectWeight = 10;
252
253 Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing("connectWeight"), numRows);
254 Kokkos::View<LO*, device_type> aggPenalties("aggPenalties", numLocalAggregates); // This gets initialized to zero here
255 Kokkos::View<LO*, device_type> aggPenaltyUpdates;
256 // if constexpr (deterministic)
257 aggPenaltyUpdates = Kokkos::View<LO*, device_type>("aggPenaltyUpdates", numLocalAggregates);
258
259 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
260
261 // taw: by running the aggregation routine more than once there is a chance that also
262 // non-aggregated nodes with a node distance of two are added to existing aggregates.
263 // Assuming that the aggregate size is 3 in each direction running the algorithm only twice
264 // should be sufficient.
265 // lbv: If the prior phase of aggregation where run without specifying an aggregate size,
266 // the distance 2 coloring and phase 1 aggregation actually guarantee that only one iteration
267 // is needed to reach distance 2 neighbors.
268 int maxIters = 2;
269 int maxNodesPerAggregate = params.get<int>("aggregation: max agg size");
270 if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
271 maxIters = 1;
272 }
273 for (int iter = 0; iter < maxIters; ++iter) {
274 for (LO color = 1; color <= numColors; ++color) {
275 // the reduce counts how many nodes are aggregated by this phase,
276 // which will then be subtracted from numNonAggregatedNodes
277 LO numAggregated = 0;
278
279 if constexpr (deterministic) {
280 if (matchMLbehavior) {
281 auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, true, true>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank);
282
283 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
284 Kokkos::RangePolicy<execution_space>(0, numRows),
285 functor,
286 numAggregated);
287 } else {
288 auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, true, false>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank);
289
290 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
291 Kokkos::RangePolicy<execution_space>(0, numRows),
292 functor,
293 numAggregated);
294 }
295 } else {
296 if (matchMLbehavior) {
297 auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, false, true>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank);
298
299 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
300 Kokkos::RangePolicy<execution_space>(0, numRows),
301 functor,
302 numAggregated);
303 } else {
304 auto functor = ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, false, false>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank);
305
306 Kokkos::parallel_reduce("Aggregation Phase 2b: aggregates expansion",
307 Kokkos::RangePolicy<execution_space>(0, numRows),
308 functor,
309 numAggregated);
310 }
311 }
312
313 if constexpr (deterministic) {
314 Kokkos::parallel_for(
315 "Aggregation Phase 2b: updating agg penalties",
316 Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
317 KOKKOS_LAMBDA(const LO agg) {
318 aggPenalties(agg) += aggPenaltyUpdates(agg);
319 aggPenaltyUpdates(agg) = 0;
320 });
321 }
322
323 numNonAggregatedNodes -= numAggregated;
324 }
325 } // loop over maxIters
326
327} // BuildAggregates
328
329} // namespace MueLu
330
331#endif /* MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_ */
Container class for aggregation information.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void BuildAggregatesNonKokkos(const ParameterList &params, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
void BuildAggregates(const ParameterList &params, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
ExpansionFunctor(AggStatType &aggStat_, ProcWinnerType &procWinner_, Vertex2AggType &vertex2AggId_, ColorsType &colors_, LocalGraphType &lclLWGraph_, AggPenaltyType &aggPenalties_, AggPenaltyType &connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
KOKKOS_INLINE_FUNCTION void operator()(const LO &i, LO &tmpNumAggregated) const
ExpansionFunctor(AggStatType &aggStat_, ProcWinnerType &procWinner_, Vertex2AggType &vertex2AggId_, ColorsType &colors_, LocalGraphType &lclLWGraph_, AggPenaltyType &aggPenalties_, AggPenaltyType &aggPenaltyUpdates_, AggPenaltyType &connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
const RCP< const Teuchos::Comm< int > > GetComm() const
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Lightweight MueLu representation of a compressed row storage graph.
Lightweight MueLu representation of a compressed row storage graph.
Timer to be used in non-factories.
Namespace for MueLu classes and methods.