Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_AdditiveSchwarzFilter_def.hpp
1// @HEADER
2// *****************************************************************************
3// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4//
5// Copyright 2009 NTESS and the Ifpack2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
11#define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
12
13#include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp"
14#include "KokkosKernels_Sorting.hpp"
15#include "KokkosSparse_SortCrs.hpp"
16#include "Kokkos_Bitset.hpp"
17
18namespace Ifpack2
19{
20namespace Details
21{
22
23template<class MatrixType>
24AdditiveSchwarzFilter<MatrixType>::
25AdditiveSchwarzFilter(const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
26 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
27 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
28 bool filterSingletons)
29{
30 setup(A_unfiltered, perm, reverseperm, filterSingletons);
31}
32
33template<class MatrixType>
34void AdditiveSchwarzFilter<MatrixType>::
35setup(const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
36 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
37 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
38 bool filterSingletons)
39{
40 using Teuchos::RCP;
41 using Teuchos::rcp;
42 using Teuchos::rcp_dynamic_cast;
43 //Check that A is an instance of allowed types, either:
44 // - Tpetra::CrsMatrix
45 // - Ifpack2::OverlappingRowMatrix (which always consists of two CrsMatrices, A_ and ExtMatrix_)
46 TEUCHOS_TEST_FOR_EXCEPTION(
47 !rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered) &&
48 !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered),
49 std::invalid_argument,
50 "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix");
51 A_unfiltered_ = A_unfiltered;
52 local_matrix_type A_local, A_halo;
53 bool haveHalo = !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
54 if(haveHalo)
55 {
56 auto A_overlapping = rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
57 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
58 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
59 }
60 else
61 {
62 auto A_crs = rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
63 A_local = A_crs->getLocalMatrixDevice();
64 //leave A_halo empty in this case
65 }
66 //Check that perm and reversePerm are the same length and match the number of local rows in A
67 TEUCHOS_TEST_FOR_EXCEPTION(
68 perm.size() != reverseperm.size(),
69 std::invalid_argument,
70 "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length");
71 TEUCHOS_TEST_FOR_EXCEPTION(
72 (size_t) perm.size() != (size_t) A_unfiltered_->getLocalNumRows(),
73 std::invalid_argument,
74 "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A");
75 //First, compute the permutation tables (that exclude singletons, if requested)
76 FilterSingletons_ = filterSingletons;
77 local_ordinal_type numLocalRows;
78 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
79 if(FilterSingletons_)
80 {
81 //Fill singletons and singletonDiagonals (allocate them to the upper bound at first, then shrink it to size)
82 singletons_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletons_"), totalLocalRows);
83 singletonDiagonals_ = Kokkos::DualView<impl_scalar_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletonDiagonals_"), totalLocalRows);
84 auto singletonsDevice = singletons_.view_device();
85 singletons_.modify_device();
86 auto singletonDiagonalsDevice = singletonDiagonals_.view_device();
87 singletonDiagonals_.modify_device();
88 local_ordinal_type numSingletons;
89 Kokkos::Bitset<device_type> isSingletonBitset(totalLocalRows);
90 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
91 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lnumSingletons, bool finalPass)
92 {
93 bool isSingleton = true;
94 impl_scalar_type singletonValue = Kokkos::ArithTraits<impl_scalar_type>::zero();
95 if(i < A_local.numRows())
96 {
97 //row i is in original local matrix
98 size_type rowBegin = A_local.graph.row_map(i);
99 size_type rowEnd = A_local.graph.row_map(i + 1);
100 for(size_type j = rowBegin; j < rowEnd; j++)
101 {
102 local_ordinal_type entry = A_local.graph.entries(j);
103 if(entry < totalLocalRows && entry != i)
104 {
105 isSingleton = false;
106 break;
107 }
108 if(finalPass && entry == i)
109 {
110 //note: using a sum to compute the diagonal value, in case entries are not compressed.
111 singletonValue += A_local.values(j);
112 }
113 }
114 }
115 else
116 {
117 //row i is in halo
118 local_ordinal_type row = i - A_local.numRows();
119 size_type rowBegin = A_halo.graph.row_map(row);
120 size_type rowEnd = A_halo.graph.row_map(row + 1);
121 for(size_type j = rowBegin; j < rowEnd; j++)
122 {
123 local_ordinal_type entry = A_halo.graph.entries(j);
124 if(entry < totalLocalRows && entry != i)
125 {
126 isSingleton = false;
127 break;
128 }
129 if(finalPass && entry == i)
130 {
131 singletonValue += A_halo.values(j);
132 }
133 }
134 }
135 if(isSingleton)
136 {
137 if(finalPass)
138 {
139 isSingletonBitset.set(i);
140 singletonsDevice(lnumSingletons) = i;
141 singletonDiagonalsDevice(lnumSingletons) = singletonValue;
142 }
143 lnumSingletons++;
144 }
145 }, numSingletons);
146 numSingletons_ = numSingletons;
147 //Each local row in A_unfiltered is either a singleton or a row in the filtered matrix.
148 numLocalRows = totalLocalRows - numSingletons_;
149 //Using the list of singletons, create the reduced permutations.
150 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), totalLocalRows);
151 perm_.modify_device();
152 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), numLocalRows);
153 reverseperm_.modify_device();
154 auto permView = perm_.view_device();
155 auto reversepermView = reverseperm_.view_device();
156 //Get the original inverse permutation on device
157 Kokkos::View<local_ordinal_type*, device_type> origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "input reverse perm"), totalLocalRows);
158 Kokkos::View<local_ordinal_type*, Kokkos::HostSpace> origpermHost(reverseperm.get(), totalLocalRows);
159 Kokkos::deep_copy(execution_space(), origpermView, origpermHost);
160 //reverseperm is a list of local rows in A_unfiltered, so filter out the elements of reverseperm where isSingleton is true. Then regenerate the forward permutation.
161 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
162 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lindex, bool finalPass)
163 {
164 local_ordinal_type origRow = origpermView(i);
165 if(!isSingletonBitset.test(origRow))
166 {
167 if(finalPass)
168 {
169 //mark the mapping in both directions between origRow and lindex (a row in filtered A)
170 reversepermView(lindex) = origRow;
171 permView(origRow) = lindex;
172 }
173 lindex++;
174 }
175 else
176 {
177 //origRow is a singleton, so mark a -1 in the new forward permutation to show that it has no corresponding row in filtered A.
178 if(finalPass)
179 permView(origRow) = local_ordinal_type(-1);
180 }
181 });
182 perm_.sync_host();
183 reverseperm_.sync_host();
184 }
185 else
186 {
187 //We will keep all the local rows, so use the permutation as is
188 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), perm.size());
189 perm_.modify_host();
190 auto permHost = perm_.view_host();
191 for(size_t i = 0; i < (size_t) perm.size(); i++)
192 permHost(i) = perm[i];
193 perm_.sync_device();
194 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_"), reverseperm.size());
195 reverseperm_.modify_host();
196 auto reversepermHost = reverseperm_.view_host();
197 for(size_t i = 0; i < (size_t) reverseperm.size(); i++)
198 reversepermHost(i) = reverseperm[i];
199 reverseperm_.sync_device();
200 numSingletons_ = 0;
201 numLocalRows = totalLocalRows;
202 }
203 auto permView = perm_.view_device();
204 auto reversepermView = reverseperm_.view_device();
205 //Fill the local matrix of A_ (filtered and permuted)
206 //First, construct the rowmap by counting the entries in each row
207 row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered rowptrs"), numLocalRows + 1);
208 Kokkos::parallel_for(policy_type(0, numLocalRows + 1),
209 KOKKOS_LAMBDA(local_ordinal_type i)
210 {
211 if(i == numLocalRows)
212 {
213 localRowptrs(i) = 0;
214 return;
215 }
216 //Count the entries that will be in filtered row i.
217 //This means entries which correspond to local, non-singleton rows/columns.
218 local_ordinal_type numEntries = 0;
219 local_ordinal_type origRow = reversepermView(i);
220 if(origRow < A_local.numRows())
221 {
222 //row i is in original local matrix
223 size_type rowBegin = A_local.graph.row_map(origRow);
224 size_type rowEnd = A_local.graph.row_map(origRow + 1);
225 for(size_type j = rowBegin; j < rowEnd; j++)
226 {
227 local_ordinal_type entry = A_local.graph.entries(j);
228 if(entry < totalLocalRows && permView(entry) != -1)
229 numEntries++;
230 }
231 }
232 else
233 {
234 //row i is in halo
235 local_ordinal_type row = origRow - A_local.numRows();
236 size_type rowBegin = A_halo.graph.row_map(row);
237 size_type rowEnd = A_halo.graph.row_map(row + 1);
238 for(size_type j = rowBegin; j < rowEnd; j++)
239 {
240 local_ordinal_type entry = A_halo.graph.entries(j);
241 if(entry < totalLocalRows && permView(entry) != -1)
242 numEntries++;
243 }
244 }
245 localRowptrs(i) = numEntries;
246 });
247 //Prefix sum to finish computing the rowptrs
248 size_type numLocalEntries;
249 Kokkos::parallel_scan(policy_type(0, numLocalRows + 1),
250 KOKKOS_LAMBDA(local_ordinal_type i, size_type& lnumEntries, bool finalPass)
251 {
252 size_type numEnt = localRowptrs(i);
253 if(finalPass)
254 localRowptrs(i) = lnumEntries;
255 lnumEntries += numEnt;
256 }, numLocalEntries);
257 //Allocate and fill local entries and values
258 entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered entries"), numLocalEntries);
259 values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered values"), numLocalEntries);
260 //Create the local matrix with the uninitialized entries/values, then fill it.
261 local_matrix_type localMatrix("AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries);
262 fillLocalMatrix(localMatrix);
263 //Create a serial comm and the map for the final filtered CrsMatrix (each process uses its own local map)
264#ifdef HAVE_IFPACK2_DEBUG
265 TEUCHOS_TEST_FOR_EXCEPTION(
266 ! mapPairsAreFitted (*A_unfiltered_), std::invalid_argument, "Ifpack2::LocalFilter: "
267 "A's Map pairs are not fitted to each other on Process "
268 << A_->getRowMap ()->getComm ()->getRank () << " of the input matrix's "
269 "communicator. "
270 "This means that LocalFilter does not currently know how to work with A. "
271 "This will change soon. Please see discussion of Bug 5992.");
272#endif // HAVE_IFPACK2_DEBUG
273
274 // Build the local communicator (containing this process only).
275 RCP<const Teuchos::Comm<int> > localComm;
276#ifdef HAVE_MPI
277 localComm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF));
278#else
279 localComm = rcp (new Teuchos::SerialComm<int> ());
280#endif // HAVE_MPI
281 //All 4 maps are the same for a local square operator.
282 localMap_ = rcp (new map_type (numLocalRows, 0, localComm, Tpetra::GloballyDistributed));
283 //Create the inner filtered matrix.
284 auto crsParams = rcp(new Teuchos::ParameterList);
285 crsParams->template set<bool>("sorted", true);
286 //NOTE: this is the fastest way to construct A_ - it's created as fillComplete,
287 //and no communication needs to be done since localMap_ uses a local comm.
288 //It does need to copy the whole local matrix to host when DualViews are constructed
289 //(only an issue with non-UVM GPU backends) but this is just unavoidable when creating a Tpetra::CrsMatrix.
290 //It also needs to compute local constants (maxNumRowEntries) but this should be a
291 //cheap operation relative to what this constructor already did.
292 A_ = rcp(new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams));
293}
294
295template<class MatrixType>
296AdditiveSchwarzFilter<MatrixType>::~AdditiveSchwarzFilter () {}
297
298template<class MatrixType>
299void AdditiveSchwarzFilter<MatrixType>::updateMatrixValues()
300{
301 //Get the local matrix back from A_
302 auto localMatrix = A_->getLocalMatrixDevice();
303 //Copy new values from A_unfiltered to the local matrix, and then reconstruct A_.
304 fillLocalMatrix(localMatrix);
305 A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values);
306}
307
308template<class MatrixType>
309Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::crs_matrix_type>
310AdditiveSchwarzFilter<MatrixType>::getFilteredMatrix() const
311{
312 return A_;
313}
314
315template<class MatrixType>
316void AdditiveSchwarzFilter<MatrixType>::fillLocalMatrix(typename AdditiveSchwarzFilter<MatrixType>::local_matrix_type localMatrix)
317{
318 auto localRowptrs = localMatrix.graph.row_map;
319 auto localEntries = localMatrix.graph.entries;
320 auto localValues = localMatrix.values;
321 auto permView = perm_.view_device();
322 auto reversepermView = reverseperm_.view_device();
323 local_matrix_type A_local, A_halo;
324 bool haveHalo = !Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
325 if(haveHalo)
326 {
327 auto A_overlapping = Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
328 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
329 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
330 }
331 else
332 {
333 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
334 A_local = A_crs->getLocalMatrixDevice();
335 //leave A_halo empty in this case
336 }
337 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
338 //Fill entries and values
339 Kokkos::parallel_for(policy_type(0, localMatrix.numRows()),
340 KOKKOS_LAMBDA(local_ordinal_type i)
341 {
342 //Count the entries that will be in filtered row i.
343 //This means entries which correspond to local, non-singleton rows/columns.
344 size_type outRowStart = localRowptrs(i);
345 local_ordinal_type insertPos = 0;
346 local_ordinal_type origRow = reversepermView(i);
347 if(origRow < A_local.numRows())
348 {
349 //row i is in original local matrix
350 size_type rowBegin = A_local.graph.row_map(origRow);
351 size_type rowEnd = A_local.graph.row_map(origRow + 1);
352 for(size_type j = rowBegin; j < rowEnd; j++)
353 {
354 local_ordinal_type entry = A_local.graph.entries(j);
355 if(entry < totalLocalRows)
356 {
357 auto newCol = permView(entry);
358 if(newCol != -1)
359 {
360 localEntries(outRowStart + insertPos) = newCol;
361 localValues(outRowStart + insertPos) = A_local.values(j);
362 insertPos++;
363 }
364 }
365 }
366 }
367 else
368 {
369 //row i is in halo
370 local_ordinal_type row = origRow - A_local.numRows();
371 size_type rowBegin = A_halo.graph.row_map(row);
372 size_type rowEnd = A_halo.graph.row_map(row + 1);
373 for(size_type j = rowBegin; j < rowEnd; j++)
374 {
375 local_ordinal_type entry = A_halo.graph.entries(j);
376 if(entry < totalLocalRows)
377 {
378 auto newCol = permView(entry);
379 if(newCol != -1)
380 {
381 localEntries(outRowStart + insertPos) = newCol;
382 localValues(outRowStart + insertPos) = A_halo.values(j);
383 insertPos++;
384 }
385 }
386 }
387 }
388 });
389 //Sort the matrix, since the reordering can shuffle the entries into any order.
390 KokkosSparse::sort_crs_matrix(localMatrix);
391}
392
393template<class MatrixType>
394Teuchos::RCP<const Teuchos::Comm<int> > AdditiveSchwarzFilter<MatrixType>::getComm() const
395{
396 return localMap_->getComm();
397}
398
399template<class MatrixType>
400Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
401AdditiveSchwarzFilter<MatrixType>::getRowMap() const
402{
403 return localMap_;
404}
405
406template<class MatrixType>
407Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
408AdditiveSchwarzFilter<MatrixType>::getColMap() const
409{
410 return localMap_;
411}
412
413template<class MatrixType>
414Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
415AdditiveSchwarzFilter<MatrixType>::getDomainMap() const
416{
417 return localMap_;
418}
419
420template<class MatrixType>
421Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
422AdditiveSchwarzFilter<MatrixType>::getRangeMap() const
423{
424 return localMap_;
425}
426
427template<class MatrixType>
428Teuchos::RCP<const Tpetra::RowGraph<typename MatrixType::local_ordinal_type,
429 typename MatrixType::global_ordinal_type,
430 typename MatrixType::node_type> >
431AdditiveSchwarzFilter<MatrixType>::getGraph() const
432{
433 //NOTE BMK 6-22: this is to maintain compatibilty with LocalFilter.
434 //Situations like overlapping AdditiveSchwarz + BlockRelaxation
435 //require the importer of the original distributed graph, even though the
436 //BlockRelaxation is preconditioning a local matrix (A_).
437 return A_unfiltered_->getGraph();
438}
439
440template<class MatrixType>
441typename MatrixType::local_ordinal_type AdditiveSchwarzFilter<MatrixType>::getBlockSize() const
442{
443 return A_->getBlockSize();
444}
445
446template<class MatrixType>
447global_size_t AdditiveSchwarzFilter<MatrixType>::getGlobalNumRows() const
448{
449 return A_->getGlobalNumRows();
450}
451
452template<class MatrixType>
453global_size_t AdditiveSchwarzFilter<MatrixType>::getGlobalNumCols() const
454{
455 return A_->getGlobalNumCols();
456}
457
458template<class MatrixType>
459size_t AdditiveSchwarzFilter<MatrixType>::getLocalNumRows() const
460{
461 return A_->getLocalNumRows();
462}
463
464template<class MatrixType>
465size_t AdditiveSchwarzFilter<MatrixType>::getLocalNumCols() const
466{
467 return A_->getLocalNumCols();
468}
469
470template<class MatrixType>
471typename MatrixType::global_ordinal_type AdditiveSchwarzFilter<MatrixType>::getIndexBase() const
472{
473 return A_->getIndexBase();
474}
475
476template<class MatrixType>
477global_size_t AdditiveSchwarzFilter<MatrixType>::getGlobalNumEntries() const
478{
479 return getLocalNumEntries();
480}
481
482template<class MatrixType>
483size_t AdditiveSchwarzFilter<MatrixType>::getLocalNumEntries() const
484{
485 return A_->getLocalNumEntries();
486}
487
488template<class MatrixType>
489size_t AdditiveSchwarzFilter<MatrixType>::
490getNumEntriesInGlobalRow (global_ordinal_type globalRow) const
491{
492 return A_->getNumEntriesInGlobalRow(globalRow);
493}
494
495template<class MatrixType>
496size_t AdditiveSchwarzFilter<MatrixType>::
497getNumEntriesInLocalRow (local_ordinal_type localRow) const
498{
499 return A_->getNumEntriesInLocalRow(localRow);
500}
501
502template<class MatrixType>
503size_t AdditiveSchwarzFilter<MatrixType>::getGlobalMaxNumRowEntries() const
504{
505 //Use A_unfiltered_ to get a valid upper bound for this.
506 //This lets us support this function without computing global constants on A_.
507 return A_unfiltered_->getGlobalMaxNumRowEntries();
508}
509
510template<class MatrixType>
511size_t AdditiveSchwarzFilter<MatrixType>::getLocalMaxNumRowEntries() const
512{
513 //Use A_unfiltered_ to get a valid upper bound for this
514 return A_unfiltered_->getLocalMaxNumRowEntries();
515}
516
517
518template<class MatrixType>
519bool AdditiveSchwarzFilter<MatrixType>::hasColMap() const
520{
521 return true;
522}
523
524
525template<class MatrixType>
526bool AdditiveSchwarzFilter<MatrixType>::isLocallyIndexed() const
527{
528 return true;
529}
530
531
532template<class MatrixType>
533bool AdditiveSchwarzFilter<MatrixType>::isGloballyIndexed() const
534{
535 return false;
536}
537
538
539template<class MatrixType>
540bool AdditiveSchwarzFilter<MatrixType>::isFillComplete() const
541{
542 return true;
543}
544
545
546template<class MatrixType>
547void AdditiveSchwarzFilter<MatrixType>::
548 getGlobalRowCopy (global_ordinal_type globalRow,
549 nonconst_global_inds_host_view_type &globalInd,
550 nonconst_values_host_view_type &val,
551 size_t& numEntries) const
552{
553 throw std::runtime_error("Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy.");
554}
555
556template<class MatrixType>
557void AdditiveSchwarzFilter<MatrixType>::
558getLocalRowCopy (local_ordinal_type LocalRow,
559 nonconst_local_inds_host_view_type &Indices,
560 nonconst_values_host_view_type &Values,
561 size_t& NumEntries) const
562
563{
564 A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries);
565}
566
567template<class MatrixType>
568void AdditiveSchwarzFilter<MatrixType>::getGlobalRowView(global_ordinal_type /* GlobalRow */,
569 global_inds_host_view_type &/*indices*/,
570 values_host_view_type &/*values*/) const
571{
572 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView.");
573}
574
575template<class MatrixType>
576void AdditiveSchwarzFilter<MatrixType>::getLocalRowView(local_ordinal_type LocalRow,
577 local_inds_host_view_type & indices,
578 values_host_view_type & values) const
579{
580 A_->getLocalRowView(LocalRow, indices, values);
581}
582
583template<class MatrixType>
584void AdditiveSchwarzFilter<MatrixType>::
585getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &diag) const
586{
587 // This is somewhat dubious as to how the maps match.
588 A_->getLocalDiagCopy(diag);
589}
590
591template<class MatrixType>
592void AdditiveSchwarzFilter<MatrixType>::leftScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
593{
594 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support leftScale.");
595}
596
597template<class MatrixType>
598void AdditiveSchwarzFilter<MatrixType>::rightScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
599{
600 throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support rightScale.");
601}
602
603template<class MatrixType>
604void AdditiveSchwarzFilter<MatrixType>::
605apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
606 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
607 Teuchos::ETransp mode,
608 scalar_type alpha,
609 scalar_type beta) const
610{
611 A_->apply(X, Y, mode, alpha, beta);
612}
613
614template<class MatrixType>
615void AdditiveSchwarzFilter<MatrixType>::
616CreateReducedProblem(
617 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingB,
618 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY,
619 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedB) const
620{
621 //NOTE: the three vectors here are always constructed within AdditiveSchwarz and are not subviews,
622 //so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
623 TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(),
624 std::logic_error, "Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride.");
625 local_ordinal_type numVecs = OverlappingB.getNumVectors();
626 auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly);
627 auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll);
628 auto singletons = singletons_.view_device();
629 auto singletonDiags = singletonDiagonals_.view_device();
630 auto revperm = reverseperm_.view_device();
631 //First, solve the singletons.
632 {
633 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
634 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) numSingletons_, numVecs}),
635 KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col)
636 {
637 local_ordinal_type row = singletons(singletonIndex);
638 y(row, col) = b(row, col) / singletonDiags(singletonIndex);
639 });
640 }
641 //Next, permute OverlappingB to ReducedReorderedB.
642 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
643 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
644 {
645 reducedB(row, col) = b(revperm(row), col);
646 });
647 //Finally, if there are singletons, eliminate the matrix entries which are in singleton columns ("eliminate" here means update reducedB like in row reduction)
648 //This could be done more efficiently by storing a separate matrix of just the singleton columns and permuted non-singleton rows, but this adds a lot of complexity.
649 //Instead, just apply() the unfiltered matrix to OverlappingY (which is 0, except for singletons), and then permute the result of that and subtract it from reducedB.
650 if(numSingletons_)
651 {
652 mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs, false);
653 A_unfiltered_->apply(OverlappingY, singletonUpdates);
654 auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly);
655 // auto revperm = reverseperm_.view_device();
656 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
657 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
658 {
659 local_ordinal_type origRow = revperm(row);
660 reducedB(row, col) -= updatesView(origRow, col);
661 });
662 }
663}
664
665template<class MatrixType>
666void AdditiveSchwarzFilter<MatrixType>::UpdateLHS(
667 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedY,
668 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY) const
669{
670 //NOTE: both vectors here are always constructed within AdditiveSchwarz and are not subviews,
671 //so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
672 TEUCHOS_TEST_FOR_EXCEPTION(!ReducedReorderedY.isConstantStride() || !OverlappingY.isConstantStride(),
673 std::logic_error, "Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride.");
674 auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly);
675 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
676 auto revperm = reverseperm_.view_device();
677 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedY.extent(0), (local_ordinal_type) reducedY.extent(1)}),
678 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
679 {
680 y(revperm(row), col) = reducedY(row, col);
681 });
682}
683
684template<class MatrixType>
685bool AdditiveSchwarzFilter<MatrixType>::hasTransposeApply() const
686{
687 return true;
688}
689
690
691template<class MatrixType>
692bool AdditiveSchwarzFilter<MatrixType>::supportsRowViews() const
693{
694 return true;
695}
696
697template<class MatrixType>
698typename AdditiveSchwarzFilter<MatrixType>::mag_type AdditiveSchwarzFilter<MatrixType>::getFrobeniusNorm() const
699{
700 // Reordering doesn't change the Frobenius norm.
701 return A_->getFrobeniusNorm ();
702}
703
704template<class MatrixType>
705bool
706AdditiveSchwarzFilter<MatrixType>::
707mapPairsAreFitted (const row_matrix_type& A)
708{
709 const map_type& rangeMap = * (A.getRangeMap ());
710 const map_type& rowMap = * (A.getRowMap ());
711 const bool rangeAndRowFitted = mapPairIsFitted (rowMap, rangeMap);
712
713 const map_type& domainMap = * (A.getDomainMap ());
714 const map_type& columnMap = * (A.getColMap ());
715 const bool domainAndColumnFitted = mapPairIsFitted (columnMap, domainMap);
716
717 //Note BMK 6-22: Map::isLocallyFitted is a local-only operation, not a collective.
718 //This means that it can return different values on different ranks. This can cause MPI to hang,
719 //even though it's supposed to terminate globally when any single rank does.
720 //
721 //This function doesn't need to be fast since it's debug-only code.
722 int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
723 int globalSuccess;
724
725 Teuchos::reduceAll<int, int> (*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
726
727 return globalSuccess == 1;
728}
729
730
731template<class MatrixType>
732bool
733AdditiveSchwarzFilter<MatrixType>::
734mapPairIsFitted (const map_type& map1, const map_type& map2)
735{
736 return map1.isLocallyFitted (map2);
737}
738
739
740}} // namespace Ifpack2::Details
741
742#define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S,LO,GO,N) \
743 template class Ifpack2::Details::AdditiveSchwarzFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
744
745#endif
746
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:41