149 typedef Teuchos::ScalarTraits<SC> STS;
150 typedef typename STS::magnitudeType real_type;
151 typedef Xpetra::MultiVector<real_type, LO, GO, NO> RealValuedMultiVector;
152 typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
160 bool doExperimentalWrap = pL.get<
bool>(
"lightweight wrap");
163 std::string algo = pL.get<std::string>(
"aggregation: drop scheme");
164 const bool aggregationMayCreateDirichlet = pL.get<
bool>(
"aggregation: dropping may create Dirichlet");
166 RCP<RealValuedMultiVector> Coords;
169 bool use_block_algorithm =
false;
170 LO interleaved_blocksize = as<LO>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize"));
171 bool useSignedClassicalRS =
false;
172 bool useSignedClassicalSA =
false;
173 bool generateColoringGraph =
false;
177 typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<
double>(
"aggregation: row sum drop tol"));
179 RCP<LocalOrdinalVector> ghostedBlockNumber;
181 if (algo ==
"distance laplacian") {
185 }
else if (algo ==
"signed classical sa") {
186 useSignedClassicalSA =
true;
189 }
else if (algo ==
"signed classical" || algo ==
"block diagonal colored signed classical" || algo ==
"block diagonal signed classical") {
190 useSignedClassicalRS =
true;
194 RCP<const Import> importer = realA->getCrsGraph()->getImporter();
195 if (!importer.is_null()) {
197 ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
198 ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
200 ghostedBlockNumber = BlockNumber;
203 if (algo ==
"block diagonal colored signed classical")
204 generateColoringGraph =
true;
208 }
else if (algo ==
"block diagonal") {
212 }
else if (algo ==
"block diagonal classical" || algo ==
"block diagonal distance laplacian") {
214 use_block_algorithm =
true;
216 if (algo ==
"block diagonal distance laplacian") {
219 if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
220 LO dim = (LO)OldCoords->getNumVectors();
221 Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
222 for (LO k = 0; k < dim; k++) {
223 ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
224 ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
225 for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) {
226 LO new_base = i * dim;
227 for (LO j = 0; j < interleaved_blocksize; j++)
228 new_vec[new_base + j] = old_vec[i];
234 algo =
"distance laplacian";
235 }
else if (algo ==
"block diagonal classical") {
246 Array<double> dlap_weights = pL.get<Array<double>>(
"aggregation: distance laplacian directional weights");
247 enum { NO_WEIGHTS = 0,
250 int use_dlap_weights = NO_WEIGHTS;
251 if (algo ==
"distance laplacian") {
252 LO dim = (LO)Coords->getNumVectors();
254 bool non_unity =
false;
255 for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
256 if (dlap_weights[i] != 1.0) {
261 LO blocksize = use_block_algorithm ? as<LO>(pL.get<
int>(
"aggregation: block diagonal: interleaved blocksize")) : 1;
262 if ((LO)dlap_weights.size() == dim)
263 use_dlap_weights = SINGLE_WEIGHTS;
264 else if ((LO)dlap_weights.size() == blocksize * dim)
265 use_dlap_weights = BLOCK_WEIGHTS;
268 "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
286 if (doExperimentalWrap) {
287 TEUCHOS_TEST_FOR_EXCEPTION(
predrop_ != null && algo !=
"classical",
Exceptions::RuntimeError,
"Dropping function must not be provided for \"" << algo <<
"\" algorithm");
288 TEUCHOS_TEST_FOR_EXCEPTION(algo !=
"classical" && algo !=
"distance laplacian" && algo !=
"signed classical",
Exceptions::RuntimeError,
"\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
292 if (pL.get<
bool>(
"aggregation: use ml scaling of drop tol"))
293 threshold = pL.get<
double>(
"aggregation: drop tol") / pow(2.0, currentLevel.
GetLevelID());
295 threshold = as<SC>(pL.get<
double>(
"aggregation: drop tol"));
297 std::string distanceLaplacianAlgoStr = pL.get<std::string>(
"aggregation: distance laplacian algo");
298 std::string classicalAlgoStr = pL.get<std::string>(
"aggregation: classical algo");
299 real_type realThreshold = STS::magnitude(threshold);
303#ifdef HAVE_MUELU_DEBUG
304 int distanceLaplacianCutVerbose = 0;
306#ifdef DJS_READ_ENV_VARIABLES
307 if (getenv(
"MUELU_DROP_TOLERANCE_MODE")) {
308 distanceLaplacianAlgoStr = std::string(getenv(
"MUELU_DROP_TOLERANCE_MODE"));
311 if (getenv(
"MUELU_DROP_TOLERANCE_THRESHOLD")) {
312 auto tmp = atoi(getenv(
"MUELU_DROP_TOLERANCE_THRESHOLD"));
313 realThreshold = 1e-4 * tmp;
316#ifdef HAVE_MUELU_DEBUG
317 if (getenv(
"MUELU_DROP_TOLERANCE_VERBOSE")) {
318 distanceLaplacianCutVerbose = atoi(getenv(
"MUELU_DROP_TOLERANCE_VERBOSE"));
326 if (algo ==
"distance laplacian") {
327 if (distanceLaplacianAlgoStr ==
"default")
329 else if (distanceLaplacianAlgoStr ==
"unscaled cut")
331 else if (distanceLaplacianAlgoStr ==
"scaled cut")
333 else if (distanceLaplacianAlgoStr ==
"scaled cut symmetric")
336 TEUCHOS_TEST_FOR_EXCEPTION(
true,
Exceptions::RuntimeError,
"\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr <<
"\"");
337 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
338 }
else if (algo ==
"classical") {
339 if (classicalAlgoStr ==
"default")
341 else if (classicalAlgoStr ==
"unscaled cut")
343 else if (classicalAlgoStr ==
"scaled cut")
346 TEUCHOS_TEST_FOR_EXCEPTION(
true,
Exceptions::RuntimeError,
"\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr <<
"\"");
347 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\" classical algorithm = \"" << classicalAlgoStr <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
350 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
352 if (((algo ==
"classical") && (classicalAlgoStr.find(
"scaled") != std::string::npos)) || ((algo ==
"distance laplacian") && (distanceLaplacianAlgoStr.find(
"scaled") != std::string::npos)))
353 TEUCHOS_TEST_FOR_EXCEPTION(realThreshold > 1.0,
Exceptions::RuntimeError,
"For cut-drop algorithms, \"aggregation: drop tol\" = " << threshold <<
", needs to be <= 1.0");
355 Set<bool>(currentLevel,
"Filtering", (threshold != STS::zero()));
357 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
360 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo !=
defaultAlgo,
Exceptions::RuntimeError,
"\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
361 TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo !=
defaultAlgo,
Exceptions::RuntimeError,
"\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
363 GO numDropped = 0, numTotal = 0;
364 std::string graphType =
"unamalgamated";
382 TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0,
Exceptions::RuntimeError,
"A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
383 const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
386 if (algo ==
"classical") {
393 RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(
predrop_);
395 "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
397 SC newt = predropConstVal->GetThreshold();
398 if (newt != threshold) {
399 GetOStream(
Warnings0) <<
"switching threshold parameter from " << threshold <<
" (list) to " << newt <<
" (user function" << std::endl;
406 if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
408 RCP<LWGraph> graph = rcp(
new LWGraph(A->getCrsGraph(),
"graph of A"));
414 graph->SetBoundaryNodeMap(boundaryNodes);
415 numTotal = A->getLocalNumEntries();
418 GO numLocalBoundaryNodes = 0;
419 GO numGlobalBoundaryNodes = 0;
420 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
421 if (boundaryNodes[i])
422 numLocalBoundaryNodes++;
423 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
424 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
428 Set(currentLevel,
"DofsPerNode", 1);
429 Set(currentLevel,
"Graph", graph);
431 }
else if ((BlockSize == 1 && threshold != STS::zero()) ||
432 (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
433 (BlockSize == 1 && useSignedClassicalRS) ||
434 (BlockSize == 1 && useSignedClassicalSA)) {
440 typename LWGraph::row_type::non_const_type rows(
"rows", A->getLocalNumRows() + 1);
441 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
443 using MT =
typename STS::magnitudeType;
444 RCP<Vector> ghostedDiag;
445 ArrayRCP<const SC> ghostedDiagVals;
446 ArrayRCP<const SC> negMaxOffDiagonal;
448 if (useSignedClassicalRS) {
449 if (ghostedBlockNumber.is_null()) {
451 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
456 negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0);
463 ghostedDiagVals = ghostedDiag->getData(0);
467 if (rowSumTol > 0.) {
468 if (ghostedBlockNumber.is_null()) {
479 ArrayRCP<const LO> g_block_id;
480 if (!ghostedBlockNumber.is_null())
481 g_block_id = ghostedBlockNumber->getData(0);
487 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
488 size_t nnz = A->getNumEntriesInLocalRow(row);
489 bool rowIsDirichlet = boundaryNodes[row];
490 ArrayView<const LO> indices;
491 ArrayView<const SC> vals;
492 A->getLocalRowView(row, indices, vals);
500 if (useSignedClassicalRS) {
502 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
503 LO col = indices[colID];
504 MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
505 MT neg_aij = -STS::real(vals[colID]);
510 if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
511 columns[realnnz++] = col;
516 rows(row + 1) = realnnz;
517 }
else if (useSignedClassicalSA) {
519 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
520 LO col = indices[colID];
522 bool is_nonpositive = STS::real(vals[colID]) <= 0;
523 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]);
524 MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID]));
530 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
531 columns(realnnz++) = col;
536 rows[row + 1] = realnnz;
539 for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
540 LO col = indices[colID];
541 MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]);
542 MT aij = STS::magnitude(vals[colID] * vals[colID]);
544 if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
545 columns(realnnz++) = col;
550 rows(row + 1) = realnnz;
556 using ExecSpace =
typename Node::execution_space;
557 using TeamPol = Kokkos::TeamPolicy<ExecSpace>;
558 using TeamMem =
typename TeamPol::member_type;
559 using ATS = Kokkos::ArithTraits<Scalar>;
560 using impl_scalar_type =
typename ATS::val_type;
561 using implATS = Kokkos::ArithTraits<impl_scalar_type>;
564 auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadOnly), Kokkos::ALL(), 0);
565 auto thresholdKokkos =
static_cast<impl_scalar_type
>(threshold);
566 auto realThresholdKokkos = implATS::magnitude(thresholdKokkos);
567 auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns);
569 auto A_device = A->getLocalMatrixDevice();
570 RCP<LWGraph> graph = rcp(
new LWGraph(A->getCrsGraph(),
"graph of A"));
571 RCP<const Import> importer = A->getCrsGraph()->getImporter();
572 RCP<LocalOrdinalVector> boundaryNodesVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetDomainMap());
573 RCP<LocalOrdinalVector> boundaryColumnVector;
574 for (
size_t i = 0; i < graph->GetNodeNumVertices(); i++) {
575 boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
577 if (!importer.is_null()) {
578 boundaryColumnVector = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(graph->GetImportMap());
579 boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
581 boundaryColumnVector = boundaryNodesVector;
583 auto boundaryColumn = boundaryColumnVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
584 auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0);
586 Kokkos::View<LO*, ExecSpace> rownnzView(
"rownnzView", A_device.numRows());
587 auto drop_views = Kokkos::View<bool*, ExecSpace>(
"drop_views", A_device.nnz());
588 auto index_views = Kokkos::View<size_t*, ExecSpace>(
"index_views", A_device.nnz());
590 Kokkos::parallel_reduce(
591 "classical_cut", TeamPol(A_device.numRows(), Kokkos::AUTO), KOKKOS_LAMBDA(
const TeamMem& teamMember, LO& globalnnz, GO& totalDropped) {
592 LO row = teamMember.league_rank();
593 auto rowView = A_device.rowConst(row);
594 size_t nnz = rowView.length;
596 auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
597 auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1)));
600 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (LO)nnz), [&](
const LO colID) {
601 index_view(colID) = colID;
602 LO col = rowView.colidx(colID);
605 if (row == col || boundary(col)) {
606 drop_view(colID) =
true;
608 drop_view(colID) =
false;
612 size_t dropStart = nnz;
615 Kokkos::Experimental::sort_team(teamMember, index_view, [=](
size_t& x,
size_t& y) ->
bool {
616 if (drop_view(x) || drop_view(y)) {
617 return drop_view(x) < drop_view(y);
619 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
620 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
621 return x_aij > y_aij;
626 Kokkos::parallel_reduce(
627 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](
size_t i,
size_t& min) {
628 auto const& x = index_view(i - 1);
629 auto const& y = index_view(i);
630 typename implATS::magnitudeType x_aij = 0;
631 typename implATS::magnitudeType y_aij = 0;
633 x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
636 y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
639 if (realThresholdKokkos * realThresholdKokkos * x_aij > y_aij) {
645 Kokkos::Min<size_t>(dropStart));
648 Kokkos::Experimental::sort_team(teamMember, index_view, [=](
size_t& x,
size_t& y) ->
bool {
649 if (drop_view(x) || drop_view(y)) {
650 return drop_view(x) < drop_view(y);
652 auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
653 auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
654 auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
655 auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
656 return (x_aij / x_aiiajj) > (y_aij / y_aiiajj);
661 Kokkos::parallel_reduce(
662 Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](
size_t i,
size_t& min) {
663 auto const& x = index_view(i - 1);
664 auto const& y = index_view(i);
665 typename implATS::magnitudeType x_val = 0;
666 typename implATS::magnitudeType y_val = 0;
668 typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x));
669 typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row));
670 x_val = x_aij / x_aiiajj;
673 typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y));
674 typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row));
675 y_val = y_aij / y_aiiajj;
678 if (realThresholdKokkos * realThresholdKokkos * x_val > y_val) {
684 Kokkos::Min<size_t>(dropStart));
688 if (dropStart < nnz) {
689 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](
size_t i) {
690 drop_view(index_view(i)) =
true;
696 Kokkos::parallel_reduce(
697 Kokkos::TeamThreadRange(teamMember, nnz), [=](
const size_t idxID, LO& keep, GO& drop) {
698 LO col = rowView.colidx(idxID);
700 if (row == col || !drop_view(idxID)) {
701 columnsDevice(A_device.graph.row_map(row) + idxID) = col;
704 columnsDevice(A_device.graph.row_map(row) + idxID) = -1;
711 totalDropped += rowDropped;
712 rownnzView(row) = rownnz;
714 realnnz, numDropped);
717 Kokkos::Experimental::remove(ExecSpace(), columnsDevice, -1);
718 Kokkos::deep_copy(columns, columnsDevice);
721 auto rowsDevice = Kokkos::create_mirror_view(ExecSpace(), rows);
722 Kokkos::parallel_scan(
723 Kokkos::RangePolicy<ExecSpace>(0, A_device.numRows()), KOKKOS_LAMBDA(
const int i, LO& partial_sum,
bool is_final) {
724 partial_sum += rownnzView(i);
725 if (is_final) rowsDevice(i + 1) = partial_sum;
727 Kokkos::deep_copy(rows, rowsDevice);
730 numTotal = A->getLocalNumEntries();
732 if (aggregationMayCreateDirichlet) {
734 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
735 if (rows[row + 1] - rows[row] <= 1)
736 boundaryNodes[row] =
true;
740 RCP<LWGraph> graph = rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(),
"thresholded graph of A"));
741 graph->SetBoundaryNodeMap(boundaryNodes);
743 GO numLocalBoundaryNodes = 0;
744 GO numGlobalBoundaryNodes = 0;
745 for (
size_t i = 0; i < boundaryNodes.size(); ++i)
746 if (boundaryNodes(i))
747 numLocalBoundaryNodes++;
748 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
749 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
752 Set(currentLevel,
"Graph", graph);
753 Set(currentLevel,
"DofsPerNode", 1);
756 if (generateColoringGraph) {
757 RCP<LWGraph> colorGraph;
758 RCP<const Import> importer = A->getCrsGraph()->getImporter();
760 Set(currentLevel,
"Coloring Graph", colorGraph);
764 Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write(
"m_regular_graph." + std::to_string(currentLevel.
GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
765 Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write(
"m_color_graph." + std::to_string(currentLevel.
GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
780 }
else if (BlockSize > 1 && threshold == STS::zero()) {
782 const RCP<const Map> rowMap = A->getRowMap();
783 const RCP<const Map> colMap = A->getColMap();
785 graphType =
"amalgamated";
791 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
792 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
793 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
794 Array<LO> colTranslation = *(amalInfo->getColTranslation());
797 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
800 typename LWGraph::row_type::non_const_type rows(
"rows", numRows + 1);
801 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
804 Kokkos::deep_copy(amalgBoundaryNodes,
false);
810 ArrayRCP<bool> pointBoundaryNodes;
816 LO blkSize = A->GetFixedBlockSize();
818 LO blkPartSize = A->GetFixedBlockSize();
819 if (A->IsView(
"stridedMaps") ==
true) {
820 Teuchos::RCP<const Map> myMap = A->getRowMap(
"stridedMaps");
821 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
823 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
824 blkId = strMap->getStridedBlockId();
826 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
832 Array<LO> indicesExtra;
833 for (LO row = 0; row < numRows; row++) {
834 ArrayView<const LO> indices;
835 indicesExtra.resize(0);
843 bool isBoundary =
false;
844 if (pL.get<
bool>(
"aggregation: greedy Dirichlet") ==
true) {
845 for (LO j = 0; j < blkPartSize; j++) {
846 if (pointBoundaryNodes[row * blkPartSize + j]) {
853 for (LO j = 0; j < blkPartSize; j++) {
854 if (!pointBoundaryNodes[row * blkPartSize + j]) {
864 MergeRows(*A, row, indicesExtra, colTranslation);
866 indicesExtra.push_back(row);
867 indices = indicesExtra;
868 numTotal += indices.size();
872 LO nnz = indices.size(), rownnz = 0;
873 for (LO colID = 0; colID < nnz; colID++) {
874 LO col = indices[colID];
875 columns(realnnz++) = col;
886 amalgBoundaryNodes[row] =
true;
888 rows(row + 1) = realnnz;
891 RCP<LWGraph> graph = rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
892 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
895 GO numLocalBoundaryNodes = 0;
896 GO numGlobalBoundaryNodes = 0;
898 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
899 if (amalgBoundaryNodes(i))
900 numLocalBoundaryNodes++;
902 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
903 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
905 <<
" agglomerated Dirichlet nodes" << std::endl;
908 Set(currentLevel,
"Graph", graph);
909 Set(currentLevel,
"DofsPerNode", blkSize);
911 }
else if (BlockSize > 1 && threshold != STS::zero()) {
913 const RCP<const Map> rowMap = A->getRowMap();
914 const RCP<const Map> colMap = A->getColMap();
915 graphType =
"amalgamated";
921 RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
922 RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
923 Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
924 Array<LO> colTranslation = *(amalInfo->getColTranslation());
927 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
930 typename LWGraph::row_type::non_const_type rows(
"rows", numRows + 1);
931 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
934 Kokkos::deep_copy(amalgBoundaryNodes,
false);
945 LO blkSize = A->GetFixedBlockSize();
947 LO blkPartSize = A->GetFixedBlockSize();
948 if (A->IsView(
"stridedMaps") ==
true) {
949 Teuchos::RCP<const Map> myMap = A->getRowMap(
"stridedMaps");
950 Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
952 blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
953 blkId = strMap->getStridedBlockId();
955 blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
960 const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
965 Array<LO> indicesExtra;
966 for (LO row = 0; row < numRows; row++) {
967 ArrayView<const LO> indices;
968 indicesExtra.resize(0);
976 bool isBoundary =
false;
977 if (pL.get<
bool>(
"aggregation: greedy Dirichlet") ==
true) {
978 for (LO j = 0; j < blkPartSize; j++) {
979 if (pointBoundaryNodes[row * blkPartSize + j]) {
986 for (LO j = 0; j < blkPartSize; j++) {
987 if (!pointBoundaryNodes[row * blkPartSize + j]) {
999 indicesExtra.push_back(row);
1000 indices = indicesExtra;
1001 numTotal += indices.size();
1005 LO nnz = indices.size(), rownnz = 0;
1006 for (LO colID = 0; colID < nnz; colID++) {
1007 LO col = indices[colID];
1008 columns[realnnz++] = col;
1019 amalgBoundaryNodes[row] =
true;
1021 rows[row + 1] = realnnz;
1025 RCP<LWGraph> graph = rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
1026 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1029 GO numLocalBoundaryNodes = 0;
1030 GO numGlobalBoundaryNodes = 0;
1032 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1033 if (amalgBoundaryNodes(i))
1034 numLocalBoundaryNodes++;
1036 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1037 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1039 <<
" agglomerated Dirichlet nodes" << std::endl;
1042 Set(currentLevel,
"Graph", graph);
1043 Set(currentLevel,
"DofsPerNode", blkSize);
1046 }
else if (algo ==
"distance laplacian") {
1047 LO blkSize = A->GetFixedBlockSize();
1048 GO indexBase = A->getRowMap()->getIndexBase();
1063 if ((blkSize == 1) && (threshold == STS::zero())) {
1065 RCP<LWGraph> graph = rcp(
new LWGraph(A->getCrsGraph(),
"graph of A"));
1066 graph->SetBoundaryNodeMap(pointBoundaryNodes);
1067 graphType =
"unamalgamated";
1068 numTotal = A->getLocalNumEntries();
1071 GO numLocalBoundaryNodes = 0;
1072 GO numGlobalBoundaryNodes = 0;
1073 for (
size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1074 if (pointBoundaryNodes(i))
1075 numLocalBoundaryNodes++;
1076 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1077 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1078 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1081 Set(currentLevel,
"DofsPerNode", blkSize);
1082 Set(currentLevel,
"Graph", graph);
1097 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() / blkSize != Coords->getLocalLength(),
Exceptions::Incompatible,
1098 "Coordinate vector length (" << Coords->getLocalLength() <<
") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() <<
") by modulo block size (" << blkSize <<
").");
1100 const RCP<const Map> colMap = A->getColMap();
1101 RCP<const Map> uniqueMap, nonUniqueMap;
1102 Array<LO> colTranslation;
1104 uniqueMap = A->getRowMap();
1105 nonUniqueMap = A->getColMap();
1106 graphType =
"unamalgamated";
1109 uniqueMap = Coords->getMap();
1111 "Different index bases for matrix and coordinates");
1115 graphType =
"amalgamated";
1117 LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1119 RCP<RealValuedMultiVector> ghostedCoords;
1120 RCP<Vector> ghostedLaplDiag;
1121 Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1122 if (threshold != STS::zero()) {
1124 RCP<const Import> importer;
1127 if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1129 importer = realA->getCrsGraph()->getImporter();
1132 importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1135 ghostedCoords = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(nonUniqueMap, Coords->getNumVectors());
1138 ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1142 RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1143 Array<LO> indicesExtra;
1144 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1145 if (threshold != STS::zero()) {
1146 const size_t numVectors = ghostedCoords->getNumVectors();
1147 coordData.reserve(numVectors);
1148 for (
size_t j = 0; j < numVectors; j++) {
1149 Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1150 coordData.push_back(tmpData);
1155 ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1156 for (LO row = 0; row < numRows; row++) {
1157 ArrayView<const LO> indices;
1160 ArrayView<const SC> vals;
1161 A->getLocalRowView(row, indices, vals);
1165 indicesExtra.resize(0);
1166 MergeRows(*A, row, indicesExtra, colTranslation);
1167 indices = indicesExtra;
1170 LO nnz = indices.size();
1171 bool haveAddedToDiag =
false;
1172 for (LO colID = 0; colID < nnz; colID++) {
1173 const LO col = indices[colID];
1176 if (use_dlap_weights == SINGLE_WEIGHTS) {
1181 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1182 int block_id = row % interleaved_blocksize;
1183 int block_start = block_id * interleaved_blocksize;
1189 haveAddedToDiag =
true;
1194 if (!haveAddedToDiag)
1195 localLaplDiagData[row] = STS::squareroot(STS::rmax());
1200 ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1201 ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1202 ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1206 GetOStream(
Runtime0) <<
"Skipping distance laplacian construction due to 0 threshold" << std::endl;
1212 typename LWGraph::row_type::non_const_type rows(
"rows", numRows + 1);
1213 typename LWGraph::entries_type::non_const_type columns(
"columns", A->getLocalNumEntries());
1215#ifdef HAVE_MUELU_DEBUG
1217 for (LO i = 0; i < (LO)columns.size(); i++) columns[i] = -666;
1221 ArrayRCP<LO> rows_stop;
1222 bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo ==
scaled_cut_symmetric;
1225 rows_stop.resize(numRows);
1228 Kokkos::deep_copy(amalgBoundaryNodes,
false);
1233 Array<LO> indicesExtra;
1236 Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1237 if (threshold != STS::zero()) {
1238 const size_t numVectors = ghostedCoords->getNumVectors();
1239 coordData.reserve(numVectors);
1240 for (
size_t j = 0; j < numVectors; j++) {
1241 Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1242 coordData.push_back(tmpData);
1246 ArrayView<const SC> vals;
1247 for (LO row = 0; row < numRows; row++) {
1248 ArrayView<const LO> indices;
1249 indicesExtra.resize(0);
1250 bool isBoundary =
false;
1254 A->getLocalRowView(row, indices, vals);
1255 isBoundary = pointBoundaryNodes[row];
1259 for (LO j = 0; j < blkSize; j++) {
1260 if (!pointBoundaryNodes[row * blkSize + j]) {
1268 MergeRows(*A, row, indicesExtra, colTranslation);
1270 indicesExtra.push_back(row);
1271 indices = indicesExtra;
1273 numTotal += indices.size();
1275 LO nnz = indices.size(), rownnz = 0;
1277 if (use_stop_array) {
1278 rows(row + 1) = rows(row) + nnz;
1279 realnnz = rows(row);
1282 if (threshold != STS::zero()) {
1286 for (LO colID = 0; colID < nnz; colID++) {
1287 LO col = indices[colID];
1290 columns(realnnz++) = col;
1296 if (isBoundary)
continue;
1299 if (use_dlap_weights == SINGLE_WEIGHTS) {
1301 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1302 int block_id = row % interleaved_blocksize;
1303 int block_start = block_id * interleaved_blocksize;
1308 real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1309 real_type aij = STS::magnitude(laplVal * laplVal);
1312 columns(realnnz++) = col;
1321 std::vector<DropTol> drop_vec;
1322 drop_vec.reserve(nnz);
1323 const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1324 const real_type one = Teuchos::ScalarTraits<real_type>::one();
1327 for (LO colID = 0; colID < nnz; colID++) {
1328 LO col = indices[colID];
1331 drop_vec.emplace_back(zero, one, colID,
false);
1335 if (isBoundary)
continue;
1338 if (use_dlap_weights == SINGLE_WEIGHTS) {
1340 }
else if (use_dlap_weights == BLOCK_WEIGHTS) {
1341 int block_id = row % interleaved_blocksize;
1342 int block_start = block_id * interleaved_blocksize;
1348 real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1349 real_type aij = STS::magnitude(laplVal * laplVal);
1351 drop_vec.emplace_back(aij, aiiajj, colID,
false);
1354 const size_t n = drop_vec.size();
1357 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1358 return a.val > b.val;
1362 for (
size_t i = 1; i < n; ++i) {
1364 auto const& x = drop_vec[i - 1];
1365 auto const& y = drop_vec[i];
1368 if (realThreshold * realThreshold * a > b) {
1370#ifdef HAVE_MUELU_DEBUG
1371 if (distanceLaplacianCutVerbose) {
1372 std::cout <<
"DJS: KEEP, N, ROW: " << i + 1 <<
", " << n <<
", " << row << std::endl;
1377 drop_vec[i].drop = drop;
1380 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1381 return a.val / a.diag > b.val / b.diag;
1385 for (
size_t i = 1; i < n; ++i) {
1387 auto const& x = drop_vec[i - 1];
1388 auto const& y = drop_vec[i];
1389 auto a = x.val / x.diag;
1390 auto b = y.val / y.diag;
1391 if (realThreshold * realThreshold * a > b) {
1393#ifdef HAVE_MUELU_DEBUG
1394 if (distanceLaplacianCutVerbose) {
1395 std::cout <<
"DJS: KEEP, N, ROW: " << i + 1 <<
", " << n <<
", " << row << std::endl;
1400 drop_vec[i].drop = drop;
1404 std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol
const& a, DropTol
const& b) {
1405 return a.col < b.col;
1408 for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) {
1409 LO col = indices[drop_vec[idxID].col];
1413 columns(realnnz++) = col;
1419 if (!drop_vec[idxID].drop) {
1420 columns(realnnz++) = col;
1431 for (LO colID = 0; colID < nnz; colID++) {
1432 LO col = indices[colID];
1433 columns(realnnz++) = col;
1445 amalgBoundaryNodes[row] =
true;
1449 rows_stop[row] = rownnz + rows[row];
1451 rows[row + 1] = realnnz;
1456 if (use_stop_array) {
1459 for (LO row = 0; row < numRows; row++) {
1460 for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1461 LO col = columns[colidx];
1462 if (col >= numRows)
continue;
1465 for (LO t_col = rows(col); !found && t_col < rows_stop[col]; t_col++) {
1466 if (columns[t_col] == row)
1471 if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1472 LO new_idx = rows_stop[col];
1474 columns[new_idx] = row;
1482 LO current_start = 0;
1483 for (LO row = 0; row < numRows; row++) {
1484 LO old_start = current_start;
1485 for (LO col = rows(row); col < rows_stop[row]; col++) {
1486 if (current_start != col) {
1487 columns(current_start) = columns(col);
1491 rows[row] = old_start;
1493 rows(numRows) = realnnz = current_start;
1499 graph = rcp(
new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
1500 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1504 GO numLocalBoundaryNodes = 0;
1505 GO numGlobalBoundaryNodes = 0;
1507 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1508 if (amalgBoundaryNodes(i))
1509 numLocalBoundaryNodes++;
1511 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1512 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1514 <<
" using threshold " << dirichletThreshold << std::endl;
1517 Set(currentLevel,
"Graph", graph);
1518 Set(currentLevel,
"DofsPerNode", blkSize);
1523 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1524 GO numGlobalTotal, numGlobalDropped;
1527 GetOStream(
Statistics1) <<
"Number of dropped entries in " << graphType <<
" matrix graph: " << numGlobalDropped <<
"/" << numGlobalTotal;
1528 if (numGlobalTotal != 0)
1529 GetOStream(
Statistics1) <<
" (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) <<
"%)";
1536 SC threshold = as<SC>(pL.get<
double>(
"aggregation: drop tol"));
1540 <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
1541 Set<bool>(currentLevel,
"Filtering", (threshold != STS::zero()));
1543 RCP<const Map> rowMap = A->getRowMap();
1544 RCP<const Map> colMap = A->getColMap();
1547 GO indexBase = rowMap->getIndexBase();
1551 if (A->IsView(
"stridedMaps") &&
1552 Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap(
"stridedMaps")) != Teuchos::null) {
1553 Xpetra::viewLabel_t oldView = A->SwitchToView(
"stridedMaps");
1554 RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1555 TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,
Exceptions::BadCast,
"MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1556 blockdim = strMap->getFixedBlockSize();
1557 offset = strMap->getOffset();
1558 oldView = A->SwitchToView(oldView);
1560 <<
" found blockdim=" << blockdim <<
" from strided maps. offset=" << offset << std::endl;
1562 GetOStream(
Statistics1) <<
"CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1566 RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1567 GetOStream(
Statistics1) <<
"CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() <<
"/" << nodeMap->getGlobalNumElements() <<
" elements" << std::endl;
1570 RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1572 LO numRows = A->getRowMap()->getLocalNumElements();
1573 LO numNodes = nodeMap->getLocalNumElements();
1575 Kokkos::deep_copy(amalgBoundaryNodes,
false);
1576 const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0);
1577 bool bIsDiagonalEntry =
false;
1582 for (LO row = 0; row < numRows; row++) {
1584 GO grid = rowMap->getGlobalElement(row);
1587 bIsDiagonalEntry =
false;
1592 size_t nnz = A->getNumEntriesInLocalRow(row);
1593 Teuchos::ArrayView<const LO> indices;
1594 Teuchos::ArrayView<const SC> vals;
1595 A->getLocalRowView(row, indices, vals);
1597 RCP<std::vector<GO>> cnodeIds = Teuchos::rcp(
new std::vector<GO>);
1599 for (LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1600 GO gcid = colMap->getGlobalElement(indices[col]);
1602 if (vals[col] != STS::zero()) {
1604 cnodeIds->push_back(cnodeId);
1606 if (grid == gcid) bIsDiagonalEntry =
true;
1610 if (realnnz == 1 && bIsDiagonalEntry ==
true) {
1611 LO lNodeId = nodeMap->getLocalElement(nodeId);
1612 numberDirichletRowsPerNode[lNodeId] += 1;
1613 if (numberDirichletRowsPerNode[lNodeId] == blockdim)
1614 amalgBoundaryNodes[lNodeId] =
true;
1617 Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp(cnodeIds);
1619 if (arr_cnodeIds.size() > 0)
1620 crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1623 crsGraph->fillComplete(nodeMap, nodeMap);
1626 RCP<LWGraph> graph = rcp(
new LWGraph(crsGraph,
"amalgamated graph of A"));
1629 graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1632 GO numLocalBoundaryNodes = 0;
1633 GO numGlobalBoundaryNodes = 0;
1634 for (
size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1635 if (amalgBoundaryNodes(i))
1636 numLocalBoundaryNodes++;
1637 RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1638 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1639 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
1644 Set(currentLevel,
"DofsPerNode", blockdim);
1645 Set(currentLevel,
"Graph", graph);