32 Teuchos::ETransp mode,
35 using impl_scalar_type =
typename Kokkos::ArithTraits<Scalar>::val_type;
36 impl_scalar_type implAlpha = alpha;
42 typename Aggregates::aggregates_sizes_type::const_type aggSizes =
aggregates_->ComputeAggregateSizes();
44 auto kokkos_view_X = X.getDeviceLocalView(Xpetra::Access::ReadOnly);
45 auto kokkos_view_Y = Y.getDeviceLocalView(Xpetra::Access::ReadWrite);
46 LO numCols = kokkos_view_X.extent(1);
48 if (mode == Teuchos::TRANS) {
50 auto vertex2AggIdView = vertex2AggId->getDeviceLocalView(Xpetra::Access::ReadOnly);
51 LO numNodes = kokkos_view_X.extent(0);
56 "MueLu:MatrixFreeTentativeR_kokkos:apply",
md_range_type({0, 0}, {numCols, numNodes}),
57 KOKKOS_LAMBDA(
const int colIdx,
const int NodeIdx) {
58 LO aggIdx = vertex2AggIdView(NodeIdx, 0);
60 Kokkos::atomic_add(&kokkos_view_Y(aggIdx, colIdx), implAlpha * kokkos_view_X(NodeIdx, colIdx) / Kokkos::sqrt(aggSizes(aggIdx)));
64 const auto vertex2Agg =
aggregates_->GetVertex2AggId();
65 auto vertex2AggView = vertex2Agg->getDeviceLocalView(Xpetra::Access::ReadOnly);
66 LO numNodes = kokkos_view_Y.extent(0);
71 "MueLu:MatrixFreeTentativeP:apply",
md_range_type({0, 0}, {numCols, numNodes}),
72 KOKKOS_LAMBDA(
const int colIdx,
const int fineIdx) {
73 LO aggIdx = vertex2AggView(fineIdx, 0);
74 kokkos_view_Y(fineIdx, colIdx) += implAlpha * kokkos_view_X(aggIdx, colIdx) / Kokkos::sqrt(aggSizes(aggIdx));