Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_ShyLUBasker_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
18
19
20#ifndef AMESOS2_SHYLUBASKER_DEF_HPP
21#define AMESOS2_SHYLUBASKER_DEF_HPP
22
23#include <Teuchos_Tuple.hpp>
24#include <Teuchos_ParameterList.hpp>
25#include <Teuchos_StandardParameterEntryValidators.hpp>
26
29
30namespace Amesos2 {
31
32
33template <class Matrix, class Vector>
34ShyLUBasker<Matrix,Vector>::ShyLUBasker(
35 Teuchos::RCP<const Matrix> A,
36 Teuchos::RCP<Vector> X,
37 Teuchos::RCP<const Vector> B )
38 : SolverCore<Amesos2::ShyLUBasker,Matrix,Vector>(A, X, B)
39 , is_contiguous_(true)
40 , use_gather_(true)
41{
42
43 //Nothing
44
45 // Override some default options
46 // TODO: use data_ here to init
47#if defined(HAVE_AMESOS2_KOKKOS) && defined(KOKKOS_ENABLE_OPENMP)
48 /*
49 static_assert(std::is_same<kokkos_exe,Kokkos::OpenMP>::value,
50 "Kokkos node type not supported by experimental ShyLUBasker Amesos2");
51 */
52 ShyLUbasker = new ::BaskerNS::BaskerTrilinosInterface<local_ordinal_type, shylubasker_dtype, Exe_Space>();
53 ShyLUbasker->Options.no_pivot = BASKER_FALSE;
54 ShyLUbasker->Options.static_delayed_pivot = 0;
55 ShyLUbasker->Options.symmetric = BASKER_FALSE;
56 ShyLUbasker->Options.realloc = BASKER_TRUE;
57 ShyLUbasker->Options.verbose = BASKER_FALSE;
58 ShyLUbasker->Options.prune = BASKER_TRUE;
59 ShyLUbasker->Options.btf_matching = 2; // use cardinary matching from Trilinos, globally
60 ShyLUbasker->Options.blk_matching = 1; // use max-weight matching from Basker on each diagonal block
61 ShyLUbasker->Options.matrix_scaling = 0; // use matrix scaling on a big A block
62 ShyLUbasker->Options.min_block_size = 0; // no merging small blocks
63 ShyLUbasker->Options.amd_dom = BASKER_TRUE; // use block-wise AMD
64 ShyLUbasker->Options.use_metis = BASKER_TRUE; // use scotch/metis for ND (TODO: should METIS optional?)
65 ShyLUbasker->Options.use_nodeNDP = BASKER_TRUE; // use nodeNDP to compute ND partition
66 ShyLUbasker->Options.run_nd_on_leaves = BASKER_TRUE; // run ND on the final leaf-nodes
67 ShyLUbasker->Options.run_amd_on_leaves = BASKER_FALSE; // run AMD on the final leaf-nodes
68 ShyLUbasker->Options.transpose = BASKER_FALSE;
69 ShyLUbasker->Options.replace_tiny_pivot = BASKER_FALSE;
70 ShyLUbasker->Options.verbose_matrix_out = BASKER_FALSE;
71
72 ShyLUbasker->Options.user_fill = (double)BASKER_FILL_USER;
73 ShyLUbasker->Options.use_sequential_diag_facto = BASKER_FALSE;
74#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
75 num_threads = Kokkos::OpenMP::max_hardware_threads();
76#else
77 num_threads = Kokkos::OpenMP::impl_max_hardware_threads();
78#endif
79
80#else
81 TEUCHOS_TEST_FOR_EXCEPTION(1 != 0,
82 std::runtime_error,
83 "Amesos2_ShyLUBasker Exception: Do not have supported Kokkos node type (OpenMP) enabled for ShyLUBasker");
84#endif
85}
86
87
88template <class Matrix, class Vector>
89ShyLUBasker<Matrix,Vector>::~ShyLUBasker( )
90{
91 /* ShyLUBasker will cleanup its own internal memory*/
92#if defined(HAVE_AMESOS2_KOKKOS) && defined(KOKKOS_ENABLE_OPENMP)
93 ShyLUbasker->Finalize();
94 delete ShyLUbasker;
95#endif
96}
97
98template <class Matrix, class Vector>
99bool
101 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1) && is_contiguous_);
102}
103
104template<class Matrix, class Vector>
105int
107{
108 /* TODO: Define what it means for ShyLUBasker
109 */
110#ifdef HAVE_AMESOS2_TIMERS
111 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
112#endif
113
114 return(0);
115}
116
117
118template <class Matrix, class Vector>
119int
120ShyLUBasker<Matrix,Vector>::symbolicFactorization_impl()
121{
122
123 int info = 0;
124 if(this->root_)
125 {
126 ShyLUbasker->SetThreads(num_threads);
127
128
129 // NDE: Special case
130 // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
131 // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
132 // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
133
134 if ( single_proc_optimization() ) {
135
136 host_ordinal_type_array sp_rowptr;
137 host_ordinal_type_array sp_colind;
138 // this needs to be checked during loadA_impl...
139 this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
140 TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
141 std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
142 this->matrixA_->returnColInd_kokkos_view(sp_colind);
143 TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
144 std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
145
146 host_value_type_array hsp_values;
147 this->matrixA_->returnValues_kokkos_view(hsp_values);
148 shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
149 //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
150 TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
151 std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
152
153 // In this case, colptr_, rowind_, nzvals_ are invalid
154 info = ShyLUbasker->Symbolic(this->globalNumRows_,
155 this->globalNumCols_,
156 this->globalNumNonZeros_,
157 sp_rowptr.data(),
158 sp_colind.data(),
159 sp_values,
160 true);
161
162 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
163 std::runtime_error, "Error in ShyLUBasker Symbolic");
164 }
165 else
166 { //follow original code path if conditions not met
167 // In this case, loadA_impl updates colptr_, rowind_, nzvals_
168 shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
169 info = ShyLUbasker->Symbolic(this->globalNumRows_,
170 this->globalNumCols_,
171 this->globalNumNonZeros_,
172 colptr_view_.data(),
173 rowind_view_.data(),
174 sp_values);
175
176 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
177 std::runtime_error, "Error in ShyLUBasker Symbolic");
178 }
179 } // end if (this->root_)
180 /*No symbolic factoriztion*/
181
182 /* All processes should have the same error code */
183 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
184 return(info);
185}
186
187
188template <class Matrix, class Vector>
189int
191{
192 using Teuchos::as;
193
194 int info = 0;
195 if ( this->root_ ){
196 { // Do factorization
197#ifdef HAVE_AMESOS2_TIMERS
198 Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
199#endif
200
201 // NDE: Special case
202 // Rather than going through the Amesos2 machinery to convert the matrixA_ CRS pointer data to CCS and store in Teuchos::Arrays,
203 // in this special case we pass the CRS raw pointers directly to ShyLUBasker which copies+transposes+sorts the data for CCS format
204 // loadA_impl is essentially an empty function in this case, as the raw pointers are handled here and similarly in Symbolic
205
206 if ( single_proc_optimization() ) {
207
208 host_ordinal_type_array sp_rowptr;
209 host_ordinal_type_array sp_colind;
210 this->matrixA_->returnRowPtr_kokkos_view(sp_rowptr);
211 TEUCHOS_TEST_FOR_EXCEPTION(sp_rowptr.data() == nullptr,
212 std::runtime_error, "Amesos2 Runtime Error: sp_rowptr returned null ");
213 this->matrixA_->returnColInd_kokkos_view(sp_colind);
214 TEUCHOS_TEST_FOR_EXCEPTION(sp_colind.data() == nullptr,
215 std::runtime_error, "Amesos2 Runtime Error: sp_colind returned null ");
216
217 host_value_type_array hsp_values;
218 this->matrixA_->returnValues_kokkos_view(hsp_values);
219 shylubasker_dtype * sp_values = function_map::convert_scalar(hsp_values.data());
220 //shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
221
222 TEUCHOS_TEST_FOR_EXCEPTION(sp_values == nullptr,
223 std::runtime_error, "Amesos2 Runtime Error: sp_values returned null ");
224
225 // In this case, colptr_, rowind_, nzvals_ are invalid
226 info = ShyLUbasker->Factor( this->globalNumRows_,
227 this->globalNumCols_,
228 this->globalNumNonZeros_,
229 sp_rowptr.data(),
230 sp_colind.data(),
231 sp_values);
232
233 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
234 std::runtime_error, "Error ShyLUBasker Factor");
235 }
236 else
237 {
238 // In this case, loadA_impl updates colptr_, rowind_, nzvals_
239 shylubasker_dtype * sp_values = function_map::convert_scalar(nzvals_view_.data());
240 info = ShyLUbasker->Factor(this->globalNumRows_,
241 this->globalNumCols_,
242 this->globalNumNonZeros_,
243 colptr_view_.data(),
244 rowind_view_.data(),
245 sp_values);
246
247 TEUCHOS_TEST_FOR_EXCEPTION(info != 0,
248 std::runtime_error, "Error ShyLUBasker Factor");
249 }
250
251 //ShyLUbasker->DEBUG_PRINT();
252
253 local_ordinal_type blnnz = local_ordinal_type(0);
254 local_ordinal_type bunnz = local_ordinal_type(0);
255 ShyLUbasker->GetLnnz(blnnz); // Add exception handling?
256 ShyLUbasker->GetUnnz(bunnz);
257
258 // This is set after numeric factorization complete as pivoting can be used;
259 // In this case, a discrepancy between symbolic and numeric nnz total can occur.
260 this->setNnzLU( as<size_t>( blnnz + bunnz ) );
261
262 } // end scope for timer
263 } // end if (this->root_)
264
265 /* All processes should have the same error code */
266 Teuchos::broadcast(*(this->matrixA_->getComm()), 0, &info);
267
268 //global_size_type info_st = as<global_size_type>(info);
269 /* TODO : Proper error messages*/
270 TEUCHOS_TEST_FOR_EXCEPTION(info == -1,
271 std::runtime_error,
272 "ShyLUBasker: Could not alloc space for L and U");
273 TEUCHOS_TEST_FOR_EXCEPTION(info == -2,
274 std::runtime_error,
275 "ShyLUBasker: Could not alloc needed work space");
276 TEUCHOS_TEST_FOR_EXCEPTION(info == -3,
277 std::runtime_error,
278 "ShyLUBasker: Could not alloc additional memory needed for L and U");
279 TEUCHOS_TEST_FOR_EXCEPTION(info > 0,
280 std::runtime_error,
281 "ShyLUBasker: Zero pivot found at: " << info );
282
283 return(info);
284}
285
286
287template <class Matrix, class Vector>
288int
290 const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
291 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
292{
293 int ierr = 0; // returned error code
294
295 using Teuchos::as;
296
297 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
298 const size_t nrhs = X->getGlobalNumVectors();
299
300 const bool ShyluBaskerTransposeRequest = this->control_.useTranspose_;
301 const bool initialize_data = true;
302 const bool do_not_initialize_data = false;
303 bool use_gather = use_gather_; // user param
304 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
305 use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value)); // only for double or float
306 {
307#ifdef HAVE_AMESOS2_TIMERS
308 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
309#endif
310 if ( single_proc_optimization() && nrhs == 1 ) {
311
312 // no msp creation
313 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
314 host_solve_array_t>::do_get(initialize_data, B, bValues_, as<size_t>(ld_rhs));
315
316 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
317 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_, as<size_t>(ld_rhs));
318
319 } // end if ( single_proc_optimization() && nrhs == 1 )
320 else {
321 if (use_gather) {
322 int rval = B->gather(bValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
323 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
324 if (rval == 0) {
325 X->gather(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
326 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
327 } else {
328 use_gather = false;
329 }
330 }
331 if (!use_gather) {
332 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
333 host_solve_array_t>::do_get(initialize_data, B, bValues_,
334 as<size_t>(ld_rhs),
335 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
336 this->rowIndexBase_);
337
338 // See Amesos2_Tacho_def.hpp for notes on why we 'get' x here.
339 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
340 host_solve_array_t>::do_get(do_not_initialize_data, X, xValues_,
341 as<size_t>(ld_rhs),
342 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
343 this->rowIndexBase_);
344 }
345 }
346 }
347
348 if ( this->root_ ) { // do solve
349#ifdef HAVE_AMESOS2_TIMERS
350 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
351#endif
352
353 shylubasker_dtype * pxValues = function_map::convert_scalar(xValues_.data());
354 shylubasker_dtype * pbValues = function_map::convert_scalar(bValues_.data());
355 if (!ShyluBaskerTransposeRequest)
356 ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues);
357 else
358 ierr = ShyLUbasker->Solve(nrhs, pbValues, pxValues, true);
359 }
360 /* All processes should have the same error code */
361 Teuchos::broadcast(*(this->getComm()), 0, &ierr);
362
363 TEUCHOS_TEST_FOR_EXCEPTION( ierr > 0,
364 std::runtime_error,
365 "Encountered zero diag element at: " << ierr);
366 TEUCHOS_TEST_FOR_EXCEPTION( ierr == -1,
367 std::runtime_error,
368 "Could not alloc needed working memory for solve" );
369 {
370#ifdef HAVE_AMESOS2_TIMERS
371 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
372#endif
373 if (use_gather) {
374 int rval = X->scatter(xValues_, this->perm_g2l, this->recvCountRows, this->recvDisplRows,
375 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
376 if (rval != 0) use_gather = false;
377 }
378 if (!use_gather) {
379 Util::put_1d_data_helper_kokkos_view<
380 MultiVecAdapter<Vector>,host_solve_array_t>::do_put(X, xValues_,
381 as<size_t>(ld_rhs),
382 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED);
383 }
384 }
385 return(ierr);
386}
387
388
389template <class Matrix, class Vector>
390bool
392{
393 // The ShyLUBasker can only handle square for right now
394 return( this->globalNumRows_ == this->globalNumCols_ );
395}
396
397
398template <class Matrix, class Vector>
399void
400ShyLUBasker<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
401{
402 using Teuchos::RCP;
403 using Teuchos::getIntegralValue;
404 using Teuchos::ParameterEntryValidator;
405
406 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
407
408 if(parameterList->isParameter("IsContiguous"))
409 {
410 is_contiguous_ = parameterList->get<bool>("IsContiguous");
411 }
412
413 if(parameterList->isParameter("UseCustomGather"))
414 {
415 use_gather_ = parameterList->get<bool>("UseCustomGather");
416 }
417
418 if(parameterList->isParameter("num_threads"))
419 {
420 num_threads = parameterList->get<int>("num_threads");
421 }
422 if(parameterList->isParameter("pivot"))
423 {
424 ShyLUbasker->Options.no_pivot = (!parameterList->get<bool>("pivot"));
425 }
426 if(parameterList->isParameter("delayed pivot"))
427 {
428 ShyLUbasker->Options.static_delayed_pivot = (parameterList->get<int>("delayed pivot"));
429 }
430 if(parameterList->isParameter("pivot_tol"))
431 {
432 ShyLUbasker->Options.pivot_tol = parameterList->get<double>("pivot_tol");
433 }
434 if(parameterList->isParameter("symmetric"))
435 {
436 ShyLUbasker->Options.symmetric = parameterList->get<bool>("symmetric");
437 }
438 if(parameterList->isParameter("realloc"))
439 {
440 ShyLUbasker->Options.realloc = parameterList->get<bool>("realloc");
441 }
442 if(parameterList->isParameter("verbose"))
443 {
444 ShyLUbasker->Options.verbose = parameterList->get<bool>("verbose");
445 }
446 if(parameterList->isParameter("verbose_matrix"))
447 {
448 ShyLUbasker->Options.verbose_matrix_out = parameterList->get<bool>("verbose_matrix");
449 }
450 if(parameterList->isParameter("btf"))
451 {
452 ShyLUbasker->Options.btf = parameterList->get<bool>("btf");
453 }
454 if(parameterList->isParameter("use_metis"))
455 {
456 ShyLUbasker->Options.use_metis = parameterList->get<bool>("use_metis");
457 }
458 if(parameterList->isParameter("use_nodeNDP"))
459 {
460 ShyLUbasker->Options.use_nodeNDP = parameterList->get<bool>("use_nodeNDP");
461 }
462 if(parameterList->isParameter("run_nd_on_leaves"))
463 {
464 ShyLUbasker->Options.run_nd_on_leaves = parameterList->get<bool>("run_nd_on_leaves");
465 }
466 if(parameterList->isParameter("run_amd_on_leaves"))
467 {
468 ShyLUbasker->Options.run_amd_on_leaves = parameterList->get<bool>("run_amd_on_leaves");
469 }
470 if(parameterList->isParameter("amd_on_blocks"))
471 {
472 ShyLUbasker->Options.amd_dom = parameterList->get<bool>("amd_on_blocks");
473 }
474 if(parameterList->isParameter("transpose"))
475 {
476 // NDE: set transpose vs non-transpose mode as bool; track separate shylubasker objects
477 const auto transpose = parameterList->get<bool>("transpose");
478 if (transpose == true)
479 this->control_.useTranspose_ = true;
480 }
481 if(parameterList->isParameter("use_sequential_diag_facto"))
482 {
483 ShyLUbasker->Options.use_sequential_diag_facto = parameterList->get<bool>("use_sequential_diag_facto");
484 }
485 if(parameterList->isParameter("user_fill"))
486 {
487 ShyLUbasker->Options.user_fill = parameterList->get<double>("user_fill");
488 }
489 if(parameterList->isParameter("prune"))
490 {
491 ShyLUbasker->Options.prune = parameterList->get<bool>("prune");
492 }
493 if(parameterList->isParameter("replace_tiny_pivot"))
494 {
495 ShyLUbasker->Options.replace_tiny_pivot = parameterList->get<bool>("replace_tiny_pivot");
496 }
497 if(parameterList->isParameter("btf_matching"))
498 {
499 ShyLUbasker->Options.btf_matching = parameterList->get<int>("btf_matching");
500 if (ShyLUbasker->Options.btf_matching == 1 || ShyLUbasker->Options.btf_matching == 2) {
501 ShyLUbasker->Options.matching = true;
502 } else {
503 ShyLUbasker->Options.matching = false;
504 }
505 }
506 if(parameterList->isParameter("blk_matching"))
507 {
508 ShyLUbasker->Options.blk_matching = parameterList->get<int>("blk_matching");
509 }
510 if(parameterList->isParameter("matrix_scaling"))
511 {
512 ShyLUbasker->Options.matrix_scaling = parameterList->get<int>("matrix_scaling");
513 }
514 if(parameterList->isParameter("min_block_size"))
515 {
516 ShyLUbasker->Options.min_block_size = parameterList->get<int>("min_block_size");
517 }
518}
519
520template <class Matrix, class Vector>
521Teuchos::RCP<const Teuchos::ParameterList>
523{
524 using Teuchos::ParameterList;
525
526 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
527
528 if( is_null(valid_params) )
529 {
530 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
531 pl->set("IsContiguous", true,
532 "Are GIDs contiguous");
533 pl->set("UseCustomGather", true,
534 "Use Matrix-gather routine");
535 pl->set("num_threads", 1,
536 "Number of threads");
537 pl->set("pivot", false,
538 "Should not pivot");
539 pl->set("delayed pivot", 0,
540 "Apply static delayed pivot on a big block");
541 pl->set("pivot_tol", .0001,
542 "Tolerance before pivot, currently not used");
543 pl->set("symmetric", false,
544 "Should Symbolic assume symmetric nonzero pattern");
545 pl->set("realloc" , false,
546 "Should realloc space if not enough");
547 pl->set("verbose", false,
548 "Information about factoring");
549 pl->set("verbose_matrix", false,
550 "Give Permuted Matrices");
551 pl->set("btf", true,
552 "Use BTF ordering");
553 pl->set("prune", false,
554 "Use prune on BTF blocks (Not Supported)");
555 pl->set("btf_matching", 2,
556 "Matching option for BTF: 0 = none, 1 = Basker, 2 = Trilinos (default), (3 = MC64 if enabled)");
557 pl->set("blk_matching", 1,
558 "Matching optioon for block: 0 = none, 1 or anything else = Basker (default), (2 = MC64 if enabled)");
559 pl->set("matrix_scaling", 0,
560 "Use matrix scaling to biig A BTF block: 0 = no-scaling, 1 = symmetric diagonal scaling, 2 = row-max, and then col-max scaling");
561 pl->set("min_block_size", 0,
562 "Size of the minimum diagonal blocks");
563 pl->set("replace_tiny_pivot", false,
564 "Replace tiny pivots during the numerical factorization");
565 pl->set("use_metis", true,
566 "Use METIS for ND");
567 pl->set("use_nodeNDP", true,
568 "Use nodeND to compute ND partition");
569 pl->set("run_nd_on_leaves", false,
570 "Run ND on the final leaf-nodes for ND factorization");
571 pl->set("run_amd_on_leaves", false,
572 "Run AMD on the final leaf-nodes for ND factorization");
573 pl->set("amd_on_blocks", true,
574 "Run AMD on each diagonal blocks");
575 pl->set("transpose", false,
576 "Solve the transpose A");
577 pl->set("use_sequential_diag_facto", false,
578 "Use sequential algorithm to factor each diagonal block");
579 pl->set("user_fill", (double)BASKER_FILL_USER,
580 "User-provided padding for the fill ratio");
581 valid_params = pl;
582 }
583 return valid_params;
584}
585
586
587template <class Matrix, class Vector>
588bool
590{
591 using Teuchos::as;
592 if(current_phase == SOLVE || current_phase == PREORDERING ) return( false );
593
594 #ifdef HAVE_AMESOS2_TIMERS
595 Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
596 #endif
597
598
599 // NDE: Can clean up duplicated code with the #ifdef guards
600 if ( single_proc_optimization() ) {
601 // NDE: Nothing is done in this special case - CRS raw pointers are passed to SHYLUBASKER and transpose of copies handled there
602 // In this case, colptr_, rowind_, nzvals_ are invalid
603 }
604 else
605 {
606
607 // Only the root image needs storage allocated
608 if( this->root_ && current_phase == SYMBFACT )
609 {
610 Kokkos::resize(nzvals_view_, this->globalNumNonZeros_);
611 Kokkos::resize(rowind_view_, this->globalNumNonZeros_);
612 Kokkos::resize(colptr_view_, this->globalNumCols_ + 1); //this will be wrong for case of gapped col ids, e.g. 0,2,4,9; num_cols = 10 ([0,10)) but num GIDs = 4...
613 }
614
615 local_ordinal_type nnz_ret = -1;
616 bool use_gather = use_gather_; // user param
617 use_gather = (use_gather && this->matrixA_->getComm()->getSize() > 1); // only with multiple MPIs
618 use_gather = (use_gather && (std::is_same<scalar_type, float>::value || std::is_same<scalar_type, double>::value)); // only for double or float
619 {
620 #ifdef HAVE_AMESOS2_TIMERS
621 Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
622 #endif
623 if (use_gather) {
624 bool column_major = true;
625 if (!is_contiguous_) {
626 auto contig_mat = this->matrixA_->reindex(this->contig_rowmap_, this->contig_colmap_, current_phase);
627 nnz_ret = contig_mat->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
628 this->transpose_map, this->nzvals_t, column_major, current_phase);
629 } else {
630 nnz_ret = this->matrixA_->gather(nzvals_view_, rowind_view_, colptr_view_, this->perm_g2l, this->recvCountRows, this->recvDisplRows, this->recvCounts, this->recvDispls,
631 this->transpose_map, this->nzvals_t, column_major, current_phase);
632 }
633 // gather failed (e.g., not implemened for KokkosCrsMatrix)
634 // in case of the failure, it falls back to the original "do_get"
635 if (nnz_ret < 0) use_gather = false;
636 }
637 if (!use_gather) {
639 MatrixAdapter<Matrix>, host_value_type_array, host_ordinal_type_array, host_ordinal_type_array>
640 ::do_get(this->matrixA_.ptr(), nzvals_view_, rowind_view_, colptr_view_, nnz_ret,
641 (is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED,
642 ARBITRARY,
643 this->rowIndexBase_); // copies from matrixA_ to ShyLUBasker ConcreteSolver cp, ri, nzval members
644 }
645 }
646
647 // gather return the total nnz_ret on every MPI process
648 if (use_gather || this->root_) {
649 TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != as<local_ordinal_type>(this->globalNumNonZeros_),
650 std::runtime_error,
651 "Amesos2_ShyLUBasker loadA_impl: Did not get the expected number of non-zero vals("
652 +std::to_string(nnz_ret)+" vs "+std::to_string(this->globalNumNonZeros_)+")");
653 }
654 } //end alternative path
655 return true;
656}
657
658
659template<class Matrix, class Vector>
660const char* ShyLUBasker<Matrix,Vector>::name = "ShyLUBasker";
661
662
663} // end namespace Amesos2
664
665#endif // AMESOS2_SHYLUBASKER_DEF_HPP
Amesos2 ShyLUBasker declarations.
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
@ CONTIGUOUS_AND_ROOTED
Definition Amesos2_TypeDecl.hpp:94
@ ARBITRARY
Definition Amesos2_TypeDecl.hpp:109
A Matrix adapter interface for Amesos2.
Definition Amesos2_MatrixAdapter_decl.hpp:42
Amesos2 interface to the Baker package.
Definition Amesos2_ShyLUBasker_decl.hpp:43
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition Amesos2_ShyLUBasker_def.hpp:589
host_ordinal_type_array colptr_view_
Stores the row indices of the nonzero entries.
Definition Amesos2_ShyLUBasker_decl.hpp:169
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
ShyLUBasker specific solve.
Definition Amesos2_ShyLUBasker_def.hpp:289
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition Amesos2_ShyLUBasker_def.hpp:522
int numericFactorization_impl()
ShyLUBasker specific numeric factorization.
Definition Amesos2_ShyLUBasker_def.hpp:190
host_value_type_array nzvals_view_
Stores the values of the nonzero entries for Umfpack.
Definition Amesos2_ShyLUBasker_decl.hpp:165
static const char * name
Name of this solver interface.
Definition Amesos2_ShyLUBasker_decl.hpp:50
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition Amesos2_ShyLUBasker_def.hpp:106
bool single_proc_optimization() const
can we optimize size_type and ordinal_type for straight pass through, also check that is_contiguous_ ...
Definition Amesos2_ShyLUBasker_def.hpp:100
host_solve_array_t xValues_
Persisting 1D store for X.
Definition Amesos2_ShyLUBasker_decl.hpp:179
host_ordinal_type_array rowind_view_
Stores the location in Ai_ and Aval_ that starts row j.
Definition Amesos2_ShyLUBasker_decl.hpp:167
host_solve_array_t bValues_
Persisting 1D store for B.
Definition Amesos2_ShyLUBasker_decl.hpp:183
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition Amesos2_ShyLUBasker_def.hpp:391
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers.
Definition Amesos2_SolverCore_decl.hpp:72
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
Definition Amesos2_SolverCore_decl.hpp:421
bool root_
Definition Amesos2_SolverCore_decl.hpp:472
void setNnzLU(size_t nnz)
Definition Amesos2_SolverCore_decl.hpp:418
global_size_type rowIndexBase_
Definition Amesos2_SolverCore_decl.hpp:451
global_size_type globalNumCols_
Definition Amesos2_SolverCore_decl.hpp:445
Timers timers_
Definition Amesos2_SolverCore_decl.hpp:463
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Definition Amesos2_SolverCore_decl.hpp:329
global_size_type globalNumNonZeros_
Definition Amesos2_SolverCore_decl.hpp:448
global_size_type globalNumRows_
Definition Amesos2_SolverCore_decl.hpp:442
Control control_
Definition Amesos2_SolverCore_decl.hpp:460
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
void transpose(ArrayView< Scalar > vals, ArrayView< GlobalOrdinal > indices, ArrayView< GlobalSizeT > ptr, ArrayView< Scalar > trans_vals, ArrayView< GlobalOrdinal > trans_indices, ArrayView< GlobalSizeT > trans_ptr)
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142
A generic helper class for getting a CCS representation of a Matrix.
Definition Amesos2_Util.hpp:615