31 Monitor m(*
this,
"BuildAggregatesNonKokkos");
33 int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
34 int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
35 bool matchMLbehavior = params.get<
bool>(
"aggregation: match ML phase2a");
38 const int myRank = graph.
GetComm()->getRank();
40 ArrayRCP<LO> vertex2AggId = aggregates.
GetVertex2AggId()->getDataNonConst(0);
41 ArrayRCP<LO> procWinner = aggregates.
GetProcWinner()->getDataNonConst(0);
45 LO numLocalNodes = procWinner.size();
46 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
48 const double aggFactor = params.get<
double>(
"aggregation: phase2a agg factor");
51 if (matchMLbehavior) {
54 GO in_data[2] = {(GO)numNonAggregatedNodes, (GO)aggStat.size()};
56 Teuchos::reduceAll(*graph.
GetComm(), Teuchos::REDUCE_SUM, 2, in_data, out_data);
57 GO phase_one_aggregated = out_data[1] - out_data[0];
58 factor = as<double>(phase_one_aggregated) / (out_data[1] + 1);
60 LO agg_stat_unaggregated = 0;
61 LO agg_stat_aggregated = 0;
63 for (LO i = 0; i < (LO)aggStat.size(); i++) {
65 agg_stat_aggregated++;
69 agg_stat_unaggregated++;
73 minNodesPerAggregate = 3;
77 factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
81 factor = pow(factor, aggFactor);
87 for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) {
88 if (aggStat[rootCandidate] !=
READY) {
94 if (matchMLbehavior) {
95 aggList[aggSize++] = rootCandidate;
101 LO num_nonaggd_neighbors = 0, num_local_neighbors = 0;
102 for (
int j = 0; j < neighOfINode.length; j++) {
103 LO neigh = neighOfINode(j);
105 num_local_neighbors++;
107 if (neigh != rootCandidate) {
114 if (aggSize < as<size_t>(maxNodesPerAggregate))
115 aggList[aggSize++] = neigh;
116 num_nonaggd_neighbors++;
123 bool accept_aggregate;
124 if (matchMLbehavior) {
129 LO rowi_N = num_local_neighbors;
130 num_nonaggd_neighbors++;
131 accept_aggregate = (rowi_N > as<LO>(minNodesPerAggregate)) && (num_nonaggd_neighbors > (factor * rowi_N));
133 accept_aggregate = (aggSize > as<size_t>(minNodesPerAggregate)) && (aggSize > factor * numNeighbors);
136 if (accept_aggregate) {
140 aggIndex = numLocalAggregates++;
142 for (
size_t k = 0; k < aggSize; k++) {
144 vertex2AggId[aggList[k]] = aggIndex;
145 procWinner[aggList[k]] = myRank;
148 numNonAggregatedNodes -= aggSize;
179 LO& numNonAggregatedNodes)
const {
183 const int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
184 const int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
185 bool matchMLbehavior = params.get<
bool>(
"aggregation: match ML phase2a");
188 const int myRank = graph.
GetComm()->getRank();
190 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
191 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
195 auto lclLWGraph = graph;
197 LO numLocalNodes = numRows;
198 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
200 const double aggFactor = 0.5;
201 double factor =
static_cast<double>(numLocalAggregated) / (numLocalNodes + 1);
202 factor = pow(factor, aggFactor);
208 Kokkos::View<LO, device_type> numLocalAggregates(
"numLocalAggregates");
209 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
210 Kokkos::create_mirror_view(numLocalAggregates);
212 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
216 for (
int color = 2; color < numColors + 1; ++color) {
217 LO tmpNumNonAggregatedNodes = 0;
218 Kokkos::parallel_reduce(
219 "Aggregation Phase 2a: loop over each individual color",
220 Kokkos::RangePolicy<execution_space>(0, numRows),
221 KOKKOS_LAMBDA(
const LO rootCandidate, LO& lNumNonAggregatedNodes) {
222 if (aggStat(rootCandidate) ==
READY &&
223 colors(rootCandidate) == color) {
226 if (matchMLbehavior) {
231 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
236 for (
int j = 0; j < neighbors.length; ++j) {
237 LO neigh = neighbors(j);
238 if (neigh != rootCandidate) {
239 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
240 (aggStat(neigh) ==
READY) &&
241 (aggSize < maxNodesPerAggregate)) {
250 if (aggSize > minNodesPerAggregate &&
251 (aggSize > factor * numNeighbors)) {
253 LO aggIndex = Kokkos::
254 atomic_fetch_add(&numLocalAggregates(), 1);
256 LO numAggregated = 0;
258 if (matchMLbehavior) {
261 vertex2AggId(rootCandidate, 0) = aggIndex;
262 procWinner(rootCandidate, 0) = myRank;
264 --lNumNonAggregatedNodes;
267 for (
int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
268 LO neigh = neighbors(neighIdx);
269 if (neigh != rootCandidate) {
270 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
271 (aggStat(neigh) ==
READY) &&
272 (numAggregated < aggSize)) {
274 vertex2AggId(neigh, 0) = aggIndex;
275 procWinner(neigh, 0) = myRank;
278 --lNumNonAggregatedNodes;
285 tmpNumNonAggregatedNodes);
286 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
290 Kokkos::deep_copy(h_numLocalAggregates, numLocalAggregates);
300 LO& numNonAggregatedNodes)
const {
304 const int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
305 const int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
308 const int myRank = graph.
GetComm()->getRank();
310 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
311 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
315 auto lclLWGraph = graph;
317 LO numLocalNodes = procWinner.size();
318 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
320 const double aggFactor = 0.5;
321 double factor = as<double>(numLocalAggregated) / (numLocalNodes + 1);
322 factor = pow(factor, aggFactor);
324 Kokkos::View<LO, device_type> numLocalAggregates(
"numLocalAggregates");
325 typename Kokkos::View<LO, device_type>::HostMirror h_numLocalAggregates =
326 Kokkos::create_mirror_view(numLocalAggregates);
328 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);
343 Kokkos::View<LO*, device_type> newRoots(
"New root LIDs", numNonAggregatedNodes);
344 Kokkos::View<LO, device_type> numNewRoots(
"Number of new aggregates of current color");
345 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
346 for (
int color = 1; color < numColors + 1; ++color) {
348 Kokkos::deep_copy(numNewRoots, h_numNewRoots);
349 Kokkos::parallel_for(
350 "Aggregation Phase 2a: determining new roots of current color",
351 Kokkos::RangePolicy<execution_space>(0, numRows),
352 KOKKOS_LAMBDA(
const LO rootCandidate) {
353 if (aggStat(rootCandidate) ==
READY &&
354 colors(rootCandidate) == color) {
356 auto neighbors = lclLWGraph.getNeighborVertices(rootCandidate);
360 for (
int j = 0; j < neighbors.length; ++j) {
361 LO neigh = neighbors(j);
362 if (neigh != rootCandidate) {
363 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
364 aggStat(neigh) ==
READY &&
365 aggSize < maxNodesPerAggregate) {
373 if (aggSize > minNodesPerAggregate && aggSize > factor * numNeighbors) {
374 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
375 newRoots(newRootIndex) = rootCandidate;
379 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
381 if (h_numNewRoots() > 0) {
383 Kokkos::sort(newRoots, 0, h_numNewRoots());
385 LO tmpNumNonAggregatedNodes = 0;
387 Kokkos::parallel_reduce(
388 "Aggregation Phase 2a: create new aggregates",
389 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
390 KOKKOS_LAMBDA(
const LO newRootIndex, LO& lNumNonAggregatedNodes) {
391 LO root = newRoots(newRootIndex);
392 LO newAggID = numLocalAggregates() + newRootIndex;
393 auto neighbors = lclLWGraph.getNeighborVertices(root);
396 vertex2AggId(root, 0) = newAggID;
398 for (
int j = 0; j < neighbors.length; ++j) {
399 LO neigh = neighbors(j);
401 if (lclLWGraph.isLocalNeighborVertex(neigh) &&
402 aggStat(neigh) ==
READY &&
403 aggSize < maxNodesPerAggregate) {
405 vertex2AggId(neigh, 0) = newAggID;
406 procWinner(neigh, 0) = myRank;
411 lNumNonAggregatedNodes -= aggSize;
413 tmpNumNonAggregatedNodes);
414 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
415 h_numLocalAggregates() += h_numNewRoots();
416 Kokkos::deep_copy(numLocalAggregates, h_numLocalAggregates);