Amesos2 - Direct Sparse Solver Interfaces Version of the Day
Amesos2_cuSOLVER_def.hpp
1// @HEADER
2// *****************************************************************************
3// Amesos2: Templated Direct Sparse Solver Package
4//
5// Copyright 2011 NTESS and the Amesos2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef AMESOS2_CUSOLVER_DEF_HPP
11#define AMESOS2_CUSOLVER_DEF_HPP
12
13#include <Teuchos_Tuple.hpp>
14#include <Teuchos_ParameterList.hpp>
15#include <Teuchos_StandardParameterEntryValidators.hpp>
16
18#include "Amesos2_cuSOLVER_decl.hpp"
19
20namespace Amesos2 {
21
22template <class Matrix, class Vector>
24 Teuchos::RCP<const Matrix> A,
25 Teuchos::RCP<Vector> X,
26 Teuchos::RCP<const Vector> B )
27 : SolverCore<Amesos2::cuSOLVER,Matrix,Vector>(A, X, B)
28{
29 auto status = cusolverSpCreate(&data_.handle);
30 TEUCHOS_TEST_FOR_EXCEPTION( status != CUSOLVER_STATUS_SUCCESS,
31 std::runtime_error, "cusolverSpCreate failed");
32
33 status = cusolverSpCreateCsrcholInfo(&data_.chol_info);
34 TEUCHOS_TEST_FOR_EXCEPTION( status != CUSOLVER_STATUS_SUCCESS,
35 std::runtime_error, "cusolverSpCreateCsrcholInfo failed");
36
37 auto sparse_status = cusparseCreateMatDescr(&data_.desc);
38 TEUCHOS_TEST_FOR_EXCEPTION( sparse_status != CUSPARSE_STATUS_SUCCESS,
39 std::runtime_error, "cusparseCreateMatDescr failed");
40}
41
42template <class Matrix, class Vector>
44{
45 cusparseDestroyMatDescr(data_.desc);
46 cusolverSpDestroyCsrcholInfo(data_.chol_info);
47 cusolverSpDestroy(data_.handle);
48}
49
50template<class Matrix, class Vector>
51int
53{
54#ifdef HAVE_AMESOS2_TIMERS
55 Teuchos::TimeMonitor preOrderTimer(this->timers_.preOrderTime_);
56#endif
57 if(do_optimization()) {
58 this->matrixA_->returnRowPtr_kokkos_view(device_row_ptr_view_);
59 this->matrixA_->returnColInd_kokkos_view(device_cols_view_);
60
61 // reorder to optimize cuSolver
62 if(data_.bReorder) {
63 Amesos2::Util::reorder(
64 device_row_ptr_view_, device_cols_view_,
65 device_perm_, device_peri_, sorted_nnz,
66 true);
67 }
68 }
69
70 return(0);
71}
72
73template <class Matrix, class Vector>
74int
76{
77#ifdef HAVE_AMESOS2_TIMERS
78 Teuchos::TimeMonitor symFactTimer(this->timers_.symFactTime_);
79#endif
80
81 int err = 0;
82 if ( this->root_ ) {
83 const int size = this->globalNumRows_;
84 const int nnz = device_cols_view_.size(); // reorder may have changed this
85 const int * colIdx = device_cols_view_.data();
86 const int * rowPtr = device_row_ptr_view_.data();
87 auto status = cusolverSpXcsrcholAnalysis(
88 data_.handle, size, nnz, data_.desc, rowPtr, colIdx, data_.chol_info);
89 err = (status != CUSOLVER_STATUS_SUCCESS) ? 1 : 0;
90 }
91
92 Teuchos::broadcast(*(this->getComm()), 0, &err);
93 TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
94 std::runtime_error, "Amesos2 cuSolver symbolic failed.");
95
96 return err;
97}
98
99template <class Matrix, class Vector>
100int
102{
103 int err = 0;
104 if(do_optimization()) { // just supporting one rank right now
105 this->matrixA_->returnValues_kokkos_view(device_nzvals_view_);
106
107 // reorder to optimize cuSolver
108 if(data_.bReorder) {
109 // must have original row and cols - maybe cache this from 1st symbiolic setup
110 // this setup exists to support the refactor option
111 device_size_type_array orig_device_row_ptr_view;
112 device_ordinal_type_array orig_device_cols_view;
113 this->matrixA_->returnRowPtr_kokkos_view(orig_device_row_ptr_view);
114 this->matrixA_->returnColInd_kokkos_view(orig_device_cols_view);
115 Amesos2::Util::reorder_values(
116 device_nzvals_view_, orig_device_row_ptr_view, device_row_ptr_view_, orig_device_cols_view,
117 device_perm_, device_peri_, sorted_nnz);
118 }
119
120 const int size = this->globalNumRows_;
121 const int nnz = device_cols_view_.size(); // reorder may have changed this
122 const cusolver_type * values = device_nzvals_view_.data();
123 const int * colIdx = device_cols_view_.data();
124 const int * rowPtr = device_row_ptr_view_.data();
125
126 size_t internalDataInBytes, workspaceInBytes;
127 auto status = function_map::bufferInfo(data_.handle, size, nnz, data_.desc,
128 values, rowPtr, colIdx, data_.chol_info,
129 &internalDataInBytes, &workspaceInBytes);
130
131 if(status == CUSOLVER_STATUS_SUCCESS) {
132 const size_t buffer_size = workspaceInBytes / sizeof(cusolver_type);
133 if(buffer_size > buffer_.extent(0)) {
134 buffer_ = device_value_type_array(
135 Kokkos::ViewAllocateWithoutInitializing("cusolver buf"), buffer_size);
136 }
137 status = function_map::numeric(data_.handle, size, nnz, data_.desc,
138 values, rowPtr, colIdx, data_.chol_info, buffer_.data());
139 }
140 err = (status != CUSOLVER_STATUS_SUCCESS) ? 1 : 0;
141 }
142
143 Teuchos::broadcast(*(this->getComm()), 0, &err);
144 TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
145 std::runtime_error, "Amesos2 cuSolver numeric failed.");
146
147 return err;
148}
149
150template <class Matrix, class Vector>
151int
153 const Teuchos::Ptr<MultiVecAdapter<Vector> > X,
154 const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
155{
156 const global_size_type ld_rhs = this->root_ ? X->getGlobalLength() : 0;
157 const ordinal_type nrhs = X->getGlobalNumVectors();
158
159 bool bAssignedX;
160 { // Get values from RHS B
161#ifdef HAVE_AMESOS2_TIMERS
162 Teuchos::TimeMonitor mvConvTimer(this->timers_.vecConvTime_);
163 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
164#endif
165
166 const bool initialize_data = true;
167 const bool do_not_initialize_data = false;
168 Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
169 device_solve_array_t>::do_get(initialize_data, B, this->bValues_, Teuchos::as<size_t>(ld_rhs),
170 ROOTED, this->rowIndexBase_);
171
172 // In general we may want to write directly to the x space without a copy.
173 // So we 'get' x which may be a direct view assignment to the MV.
174 bAssignedX = Util::get_1d_copy_helper_kokkos_view<MultiVecAdapter<Vector>,
175 device_solve_array_t>::do_get(do_not_initialize_data, X, this->xValues_, Teuchos::as<size_t>(ld_rhs),
176 ROOTED, this->rowIndexBase_);
177 }
178
179 int err = 0;
180
181 if ( this->root_ ) { // Do solve!
182#ifdef HAVE_AMESOS2_TIMER
183 Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
184#endif
185
186 const int size = this->globalNumRows_;
187
188 if(data_.bReorder) {
189 Amesos2::Util::apply_reorder_permutation(
190 this->bValues_, this->permute_result_, this->device_perm_);
191 }
192 else {
193 this->permute_result_ = this->bValues_; // no permutation
194 }
195
196 for(ordinal_type n = 0; n < nrhs; ++n) {
197 const cusolver_type * b = this->permute_result_.data() + n * size;
198 cusolver_type * x = this->xValues_.data() + n * size;
199 auto status = function_map::solve(
200 data_.handle, size, b, x, data_.chol_info, buffer_.data());
201 err = (status != CUSOLVER_STATUS_SUCCESS) ? 1 : 0;
202 if(err != 0) {
203 break;
204 }
205 }
206
207 if(data_.bReorder && err == 0) {
208 Amesos2::Util::apply_reorder_permutation(
209 this->xValues_, this->permute_result_, this->device_peri_);
210 Kokkos::deep_copy(this->xValues_, this->permute_result_); // full copy since permute_result_ is reused
211 }
212 }
213
214 /* Update X's global values */
215
216 // if bDidAssignX, then we solved straight to the adapter's X memory space without
217 // requiring additional memory allocation, so the x data is already in place.
218 if(!bAssignedX) {
219#ifdef HAVE_AMESOS2_TIMERS
220 Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
221#endif
222
223 Util::template put_1d_data_helper_kokkos_view<
224 MultiVecAdapter<Vector>,device_solve_array_t>::do_put(X, xValues_,
225 Teuchos::as<size_t>(ld_rhs), ROOTED, this->rowIndexBase_);
226 }
227
228 Teuchos::broadcast(*(this->getComm()), 0, &err);
229 TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
230 std::runtime_error, "Amesos2 cuSolver solve failed.");
231
232 return err;
233}
234
235template <class Matrix, class Vector>
236bool
238{
239 return( this->matrixA_->getGlobalNumRows() == this->matrixA_->getGlobalNumCols() );
240}
241
242template <class Matrix, class Vector>
243void
244cuSOLVER<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
245{
246 using Teuchos::RCP;
247 using Teuchos::ParameterEntryValidator;
248
249 RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
250
251 if( parameterList->isParameter("Reorder") ){
252 RCP<const ParameterEntryValidator> reorder_validator = valid_params->getEntry("Reorder").validator();
253 parameterList->getEntry("Reorder").setValidator(reorder_validator);
254 }
255
256 data_.bReorder = parameterList->get<bool>("Reorder", true);
257}
258
259template <class Matrix, class Vector>
260Teuchos::RCP<const Teuchos::ParameterList>
262{
263 static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
264
265 if( is_null(valid_params) ){
266 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
267
268 pl->set("Reorder", true, "Whether GIDs contiguous");
269
270 valid_params = pl;
271 }
272
273 return valid_params;
274}
275
276template <class Matrix, class Vector>
277bool
279 return (this->root_ && (this->matrixA_->getComm()->getSize() == 1));
280}
281
282template <class Matrix, class Vector>
283bool
285{
286 if(current_phase == SOLVE) {
287 return(false);
288 }
289
290 if(!do_optimization()) { // we're only doing serial right now for cuSolver
291 TEUCHOS_TEST_FOR_EXCEPTION( true, std::runtime_error,
292 "cuSolver is only implemented for serial.");
293 }
294
295 return true;
296}
297
298template<class Matrix, class Vector>
299const char* cuSOLVER<Matrix,Vector>::name = "cuSOLVER";
300
301} // end namespace Amesos2
302
303#endif // AMESOS2_CUSOLVER_DEF_HPP
@ ROOTED
Definition Amesos2_TypeDecl.hpp:93
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
Definition Amesos2_SolverCore_decl.hpp:421
bool root_
Definition Amesos2_SolverCore_decl.hpp:472
SolverCore(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
global_size_type rowIndexBase_
Definition Amesos2_SolverCore_decl.hpp:451
Timers timers_
Definition Amesos2_SolverCore_decl.hpp:463
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Definition Amesos2_SolverCore_decl.hpp:329
global_size_type globalNumRows_
Definition Amesos2_SolverCore_decl.hpp:442
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition Amesos2_cuSOLVER_def.hpp:244
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition Amesos2_cuSOLVER_def.hpp:52
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal structures.
Definition Amesos2_cuSOLVER_def.hpp:284
cuSOLVER(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition Amesos2_cuSOLVER_def.hpp:23
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition Amesos2_cuSOLVER_def.hpp:237
static const char * name
Name of this solver interface.
Definition Amesos2_cuSOLVER_decl.hpp:32
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
cuSOLVER specific solve.
Definition Amesos2_cuSOLVER_def.hpp:152
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition Amesos2_cuSOLVER_def.hpp:261
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using cuSOLVER.
Definition Amesos2_cuSOLVER_def.hpp:75
bool do_optimization() const
can we optimize size_type and ordinal_type for straight pass through
Definition Amesos2_cuSOLVER_def.hpp:278
int numericFactorization_impl()
cuSOLVER specific numeric factorization
Definition Amesos2_cuSOLVER_def.hpp:101
~cuSOLVER()
Destructor.
Definition Amesos2_cuSOLVER_def.hpp:43
EPhase
Used to indicate a phase in the direct solution.
Definition Amesos2_TypeDecl.hpp:31
const int size
Definition klu2_simple.cpp:50
A templated MultiVector class adapter for Amesos2.
Definition Amesos2_MultiVecAdapter_decl.hpp:142