Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockView.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_BLOCKVIEW_HPP
11#define TPETRA_BLOCKVIEW_HPP
12
20
21#include "TpetraCore_config.h"
22#include "Kokkos_ArithTraits.hpp"
23#include "Kokkos_Complex.hpp"
24
25namespace Tpetra {
26
31
32namespace Impl {
33
40template<class ViewType1,
41 class ViewType2,
42 const int rank1 = ViewType1::rank>
43struct AbsMax {
44 static KOKKOS_INLINE_FUNCTION void
45 run (const ViewType2& Y, const ViewType1& X);
46};
47
52template<class ViewType1,
53 class ViewType2>
54struct AbsMax<ViewType1, ViewType2, 2> {
57 static KOKKOS_INLINE_FUNCTION void
58 run (const ViewType2& Y, const ViewType1& X)
59 {
60 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
61 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
62 typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
63 static_assert(! std::is_const<STY>::value,
64 "AbsMax: The type of each entry of Y must be nonconst.");
65 typedef typename std::decay<decltype (X(0,0)) >::type STX;
66 static_assert( std::is_same<STX, STY>::value,
67 "AbsMax: The type of each entry of X and Y must be the same.");
68 typedef Kokkos::ArithTraits<STY> KAT;
69
70 const int numCols = Y.extent (1);
71 const int numRows = Y.extent (0);
72 for (int j = 0; j < numCols; ++j) {
73 for (int i = 0; i < numRows; ++i) {
74 STY& Y_ij = Y(i,j); // use ref here to avoid 2nd op() call on Y
75 const STX X_ij = X(i,j);
76 // NOTE: no std::max (not a CUDA __device__ function); must
77 // cast back up to complex.
78 const auto Y_ij_abs = KAT::abs (Y_ij);
79 const auto X_ij_abs = KAT::abs (X_ij);
80 Y_ij = (Y_ij_abs >= X_ij_abs) ?
81 static_cast<STY> (Y_ij_abs) :
82 static_cast<STY> (X_ij_abs);
83 }
84 }
85 }
86};
87
92template<class ViewType1,
93 class ViewType2>
94struct AbsMax<ViewType1, ViewType2, 1> {
97 static KOKKOS_INLINE_FUNCTION void
98 run (const ViewType2& Y, const ViewType1& X)
99 {
100 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
101 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
102
103 typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
104 static_assert(! std::is_const<STY>::value,
105 "AbsMax: The type of each entry of Y must be nonconst.");
106 typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
107 static_assert( std::is_same<STX, STY>::value,
108 "AbsMax: The type of each entry of X and Y must be the same.");
109 typedef Kokkos::ArithTraits<STY> KAT;
110
111 const int numRows = Y.extent (0);
112 for (int i = 0; i < numRows; ++i) {
113 STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
114 const STX X_i = X(i);
115 // NOTE: no std::max (not a CUDA __device__ function); must
116 // cast back up to complex.
117 const auto Y_i_abs = KAT::abs (Y_i);
118 const auto X_i_abs = KAT::abs (X_i);
119 Y_i = (Y_i_abs >= X_i_abs) ?
120 static_cast<STY> (Y_i_abs) :
121 static_cast<STY> (X_i_abs);
122 }
123 }
124};
125
132template<class ViewType1, class ViewType2, const int rank = ViewType1::rank>
133KOKKOS_INLINE_FUNCTION void
134absMax (const ViewType2& Y, const ViewType1& X)
135{
136 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
137 "absMax: ViewType1 and ViewType2 must have the same rank.");
138 AbsMax<ViewType1, ViewType2, rank>::run (Y, X);
139}
140
145template<class ViewType,
146 class CoefficientType,
147 class IndexType = int,
148 const bool is_contiguous = false,
149 const int rank = ViewType::rank>
150struct SCAL {
151 static KOKKOS_INLINE_FUNCTION void
152 run (const CoefficientType& alpha, const ViewType& x);
153};
154
157template<class ViewType,
158 class CoefficientType,
159 class IndexType>
160struct SCAL<ViewType, CoefficientType, IndexType, false, 1> {
162 static KOKKOS_INLINE_FUNCTION void
163 run (const CoefficientType& alpha, const ViewType& x)
164 {
165 const IndexType numRows = static_cast<IndexType> (x.extent (0));
166
168#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
169#pragma unroll
170#endif
171 for (IndexType i = 0; i < numRows; ++i)
172 x(i) = alpha * x(i);
173 }
174};
175
177template<class ViewType,
178 class CoefficientType,
179 class IndexType>
180struct SCAL<ViewType, CoefficientType, IndexType, false, 2> {
182 static KOKKOS_INLINE_FUNCTION void
183 run (const CoefficientType& alpha, const ViewType& A)
184 {
185 const IndexType numRows = static_cast<IndexType> (A.extent (0));
186 const IndexType numCols = static_cast<IndexType> (A.extent (1));
187
188 for (IndexType j = 0; j < numCols; ++j)
189 for (IndexType i = 0; i < numRows; ++i)
190 A(i,j) = alpha * A(i,j);
191 }
192};
193template<class ViewType,
194 class CoefficientType,
195 class IndexType,
196 const int rank>
197struct SCAL<ViewType, CoefficientType, IndexType, true, rank> {
199 static KOKKOS_INLINE_FUNCTION void
200 run (const CoefficientType& alpha, const ViewType& x)
201 {
202 using x_value_type = typename std::decay<decltype (*x.data()) >::type;
203 const IndexType span = static_cast<IndexType> (x.span());
204 x_value_type *__restrict x_ptr(x.data());
205#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
206#pragma unroll
207#endif
208 for (IndexType i = 0; i < span; ++i)
209 x_ptr[i] = alpha * x_ptr[i];
210 }
211};
212
217template<class ViewType,
218 class InputType,
219 class IndexType = int,
220 const bool is_contiguous = false,
221 const int rank = ViewType::rank>
222struct FILL {
223 static KOKKOS_INLINE_FUNCTION void
224 run (const ViewType& x, const InputType& val);
225};
226
229template<class ViewType,
230 class InputType,
231 class IndexType>
232struct FILL<ViewType, InputType, IndexType, false, 1> {
233 static KOKKOS_INLINE_FUNCTION void
234 run (const ViewType& x, const InputType& val)
235 {
236 const IndexType numRows = static_cast<IndexType> (x.extent (0));
237 for (IndexType i = 0; i < numRows; ++i)
238 x(i) = val;
239 }
240};
241
243template<class ViewType,
244 class InputType,
245 class IndexType>
246struct FILL<ViewType, InputType, IndexType, false, 2> {
247 static KOKKOS_INLINE_FUNCTION void
248 run (const ViewType& X, const InputType& val)
249 {
250 const IndexType numRows = static_cast<IndexType> (X.extent (0));
251 const IndexType numCols = static_cast<IndexType> (X.extent (1));
252 for (IndexType j = 0; j < numCols; ++j)
253 for (IndexType i = 0; i < numRows; ++i)
254 X(i,j) = val;
255 }
256};
257template<class ViewType,
258 class InputType,
259 class IndexType,
260 const int rank>
261struct FILL<ViewType, InputType, IndexType, true, rank> {
262 static KOKKOS_INLINE_FUNCTION void
263 run (const ViewType& x, const InputType& val)
264 {
265 const IndexType span = static_cast<IndexType> (x.span());
266 auto x_ptr = x.data();
267#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
268#pragma unroll
269#endif
270 for (IndexType i = 0; i < span; ++i)
271 x_ptr[i] = val;
272 }
273};
274
275
280template<class CoefficientType,
281 class ViewType1,
282 class ViewType2,
283 class IndexType = int,
284 const bool is_contiguous = false,
285 const int rank = ViewType1::rank>
286struct AXPY {
287 static KOKKOS_INLINE_FUNCTION void
288 run (const CoefficientType& alpha,
289 const ViewType1& x,
290 const ViewType2& y);
291};
292
295template<class CoefficientType,
296 class ViewType1,
297 class ViewType2,
298 class IndexType>
299struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 1> {
301 static KOKKOS_INLINE_FUNCTION void
302 run (const CoefficientType& alpha,
303 const ViewType1& x,
304 const ViewType2& y)
305 {
306 using Kokkos::ArithTraits;
307 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
308 "AXPY: x and y must have the same rank.");
309
310 const IndexType numRows = static_cast<IndexType> (y.extent (0));
311 if (alpha != ArithTraits<CoefficientType>::zero ()) {
313 for (IndexType i = 0; i < numRows; ++i)
314 y(i) += alpha * x(i);
315 }
316 }
317};
318
321template<class CoefficientType,
322 class ViewType1,
323 class ViewType2,
324 class IndexType>
325struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 2> {
327 static KOKKOS_INLINE_FUNCTION void
328 run (const CoefficientType& alpha,
329 const ViewType1& X,
330 const ViewType2& Y)
331 {
332 using Kokkos::ArithTraits;
333 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
334 "AXPY: X and Y must have the same rank.");
335 const IndexType numRows = static_cast<IndexType> (Y.extent (0));
336 const IndexType numCols = static_cast<IndexType> (Y.extent (1));
337
338 if (alpha != ArithTraits<CoefficientType>::zero ()) {
339 for (IndexType j = 0; j < numCols; ++j)
340 for (IndexType i = 0; i < numRows; ++i)
341 Y(i,j) += alpha * X(i,j);
342 }
343 }
344};
345
346template<class CoefficientType,
347 class ViewType1,
348 class ViewType2,
349 class IndexType,
350 const int rank>
351struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, true, rank> {
353 static KOKKOS_INLINE_FUNCTION void
354 run (const CoefficientType& alpha,
355 const ViewType1& x,
356 const ViewType2& y)
357 {
358 using Kokkos::ArithTraits;
359 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
360 "AXPY: x and y must have the same rank.");
361
362 if (alpha != ArithTraits<CoefficientType>::zero ()) {
363 using x_value_type = typename std::decay<decltype (*x.data()) >::type;
364 using y_value_type = typename std::decay<decltype (*y.data()) >::type;
365 const IndexType span = static_cast<IndexType> (y.span());
366 const x_value_type *__restrict x_ptr(x.data());
367 y_value_type *__restrict y_ptr(y.data());
368#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
369#pragma unroll
370#endif
371 for (IndexType i = 0; i < span; ++i)
372 y_ptr[i] += alpha * x_ptr[i];
373 }
374 }
375};
376
381template<class ViewType1,
382 class ViewType2,
383 class IndexType = int,
384 const bool is_contiguous = false,
385 const int rank = ViewType1::rank>
386struct COPY {
387 static KOKKOS_INLINE_FUNCTION void
388 run (const ViewType1& x, const ViewType2& y);
389};
390
393template<class ViewType1,
394 class ViewType2,
395 class IndexType>
396struct COPY<ViewType1, ViewType2, IndexType, false, 1> {
398 static KOKKOS_INLINE_FUNCTION void
399 run (const ViewType1& x, const ViewType2& y)
400 {
401 const IndexType numRows = static_cast<IndexType> (x.extent (0));
403 for (IndexType i = 0; i < numRows; ++i)
404 y(i) = x(i);
405 }
406};
407
410template<class ViewType1,
411 class ViewType2,
412 class IndexType>
413struct COPY<ViewType1, ViewType2, IndexType, false, 2> {
415 static KOKKOS_INLINE_FUNCTION void
416 run (const ViewType1& X, const ViewType2& Y)
417 {
418 const IndexType numRows = static_cast<IndexType> (Y.extent (0));
419 const IndexType numCols = static_cast<IndexType> (Y.extent (1));
421 for (IndexType j = 0; j < numCols; ++j)
422 for (IndexType i = 0; i < numRows; ++i)
423 Y(i,j) = X(i,j);
424 }
425};
426
427template<class ViewType1,
428 class ViewType2,
429 class IndexType,
430 const int rank>
431struct COPY<ViewType1, ViewType2, IndexType, true, rank> {
433 static KOKKOS_INLINE_FUNCTION void
434 run (const ViewType1& x, const ViewType2& y)
435 {
436 const IndexType span = static_cast<IndexType> (x.span());
437 using x_value_type = typename std::decay<decltype (*x.data()) >::type;
438 using y_value_type = typename std::decay<decltype (*y.data()) >::type;
439 const x_value_type *__restrict x_ptr(x.data());
440 y_value_type *__restrict y_ptr(y.data());
441
442#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
443#pragma unroll
444#endif
445 for (IndexType i = 0; i < span; ++i)
446 y_ptr[i] = x_ptr[i];
447 }
448};
449
450template<class VecType1,
451 class BlkType,
452 class VecType2,
453 class CoeffType,
454 class IndexType = int,
455 bool is_contiguous = false,
456 class BlkLayoutType = typename BlkType::array_layout>
457struct GEMV {
458 static KOKKOS_INLINE_FUNCTION void
459 run (const CoeffType& alpha,
460 const BlkType& A,
461 const VecType1& x,
462 const VecType2& y)
463 {
464 static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
465 static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
466 static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
467
468 const IndexType numRows = static_cast<IndexType> (A.extent (0));
469 const IndexType numCols = static_cast<IndexType> (A.extent (1));
470
472 for (IndexType i = 0; i < numRows; ++i)
473 for (IndexType j = 0; j < numCols; ++j)
474 y(i) += alpha * A(i,j) * x(j);
475 }
476};
477
478template<class VecType1,
479 class BlkType,
480 class VecType2,
481 class CoeffType,
482 class IndexType>
483struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutLeft> {
484 static KOKKOS_INLINE_FUNCTION void
485 run (const CoeffType& alpha,
486 const BlkType& A,
487 const VecType1& x,
488 const VecType2& y)
489 {
490 static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
491 static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
492 static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
493
494 using A_value_type = typename std::decay<decltype (A(0,0)) >::type;
495 using x_value_type = typename std::decay<decltype (x(0)) >::type;
496 using y_value_type = typename std::decay<decltype (y(0)) >::type;
497
498 const IndexType numRows = static_cast<IndexType> (A.extent (0));
499 const IndexType numCols = static_cast<IndexType> (A.extent (1));
500
501 const A_value_type *__restrict A_ptr(A.data()); const IndexType as1(A.stride(1));
502 const x_value_type *__restrict x_ptr(x.data());
503 y_value_type *__restrict y_ptr(y.data());
504
505#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
506#pragma unroll
507#endif
508 for (IndexType j=0;j<numCols;++j) {
509 const x_value_type x_at_j = alpha*x_ptr[j];
510 const A_value_type *__restrict A_at_j = A_ptr + j*as1;
511#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
512#pragma unroll
513#endif
514 for (IndexType i=0;i<numRows;++i)
515 y_ptr[i] += A_at_j[i] * x_at_j;
516 }
517 }
518};
519
520template<class VecType1,
521 class BlkType,
522 class VecType2,
523 class CoeffType,
524 class IndexType>
525struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutRight> {
526 static KOKKOS_INLINE_FUNCTION void
527 run (const CoeffType& alpha,
528 const BlkType& A,
529 const VecType1& x,
530 const VecType2& y)
531 {
532 static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
533 static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
534 static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
535
536 using A_value_type = typename std::decay<decltype (A(0,0)) >::type;
537 using x_value_type = typename std::decay<decltype (x(0)) >::type;
538 using y_value_type = typename std::decay<decltype (y(0)) >::type;
539
540 const IndexType numRows = static_cast<IndexType> (A.extent (0));
541 const IndexType numCols = static_cast<IndexType> (A.extent (1));
542
543 const A_value_type *__restrict A_ptr(A.data()); const IndexType as0(A.stride(0));
544 const x_value_type *__restrict x_ptr(x.data());
545 y_value_type *__restrict y_ptr(y.data());
546
547#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
548#pragma unroll
549#endif
550 for (IndexType i=0;i<numRows;++i) {
551 y_value_type y_at_i(0);
552 const auto A_at_i = A_ptr + i*as0;
553#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
554#pragma unroll
555#endif
556 for (IndexType j=0;j<numCols;++j)
557 y_at_i += A_at_i[j] * x_ptr[j];
558 y_ptr[i] += alpha*y_at_i;
559 }
560 }
561};
562
563} // namespace Impl
564
567template<class ViewType,
568 class CoefficientType,
569 class IndexType = int,
570 const int rank = ViewType::rank>
571KOKKOS_INLINE_FUNCTION void
572SCAL (const CoefficientType& alpha, const ViewType& x)
573{
574 using LayoutType = typename ViewType::array_layout;
575 constexpr bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
576 std::is_same<LayoutType,Kokkos::LayoutRight>::value);
577 Impl::SCAL<ViewType, CoefficientType, IndexType, is_contiguous, rank>::run (alpha, x);
578}
579
581template<class ViewType,
582 class InputType,
583 class IndexType = int,
584 const int rank = ViewType::rank>
585KOKKOS_INLINE_FUNCTION void
586FILL (const ViewType& x, const InputType& val)
587{
588 using LayoutType = typename ViewType::array_layout;
589 constexpr bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
590 std::is_same<LayoutType,Kokkos::LayoutRight>::value);
591 Impl::FILL<ViewType, InputType, IndexType, is_contiguous, rank>::run (x, val);
592}
593
599template<class CoefficientType,
600 class ViewType1,
601 class ViewType2,
602 class IndexType = int,
603 const int rank = ViewType1::rank>
604KOKKOS_INLINE_FUNCTION void
605AXPY (const CoefficientType& alpha,
606 const ViewType1& x,
607 const ViewType2& y)
608{
609 static_assert (static_cast<int> (ViewType1::rank) ==
610 static_cast<int> (ViewType2::rank),
611 "AXPY: x and y must have the same rank.");
612 using LayoutType1 = typename ViewType1::array_layout;
613 using LayoutType2 = typename ViewType2::array_layout;
614 constexpr bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
615 constexpr bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
616 std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
617 constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
618 Impl::AXPY<CoefficientType, ViewType1, ViewType2, IndexType, is_contiguous, rank>::run (alpha, x, y);
619}
620
629template<class ViewType1,
630 class ViewType2,
631 class IndexType = int,
632 const int rank = ViewType1::rank>
633KOKKOS_INLINE_FUNCTION void
634COPY (const ViewType1& x, const ViewType2& y)
635{
636 static_assert (static_cast<int> (ViewType1::rank) ==
637 static_cast<int> (ViewType2::rank),
638 "COPY: x and y must have the same rank.");
639 using LayoutType1 = typename ViewType1::array_layout;
640 using LayoutType2 = typename ViewType2::array_layout;
641 constexpr bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
642 constexpr bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
643 std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
644 constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
645 Impl::COPY<ViewType1, ViewType2, IndexType, is_contiguous, rank>::run (x, y);
646}
647
659template<class VecType1,
660 class BlkType,
661 class VecType2,
662 class CoeffType,
663 class IndexType = int>
664KOKKOS_INLINE_FUNCTION void
665GEMV (const CoeffType& alpha,
666 const BlkType& A,
667 const VecType1& x,
668 const VecType2& y)
669{
670 constexpr bool is_A_contiguous = (std::is_same<typename BlkType::array_layout, Kokkos::LayoutLeft>::value ||
671 std::is_same<typename BlkType::array_layout, Kokkos::LayoutRight>::value);
672 constexpr bool is_x_contiguous = (std::is_same<typename VecType1::array_layout, Kokkos::LayoutLeft>::value ||
673 std::is_same<typename VecType1::array_layout, Kokkos::LayoutRight>::value);
674 constexpr bool is_y_contiguous = (std::is_same<typename VecType2::array_layout, Kokkos::LayoutLeft>::value ||
675 std::is_same<typename VecType2::array_layout, Kokkos::LayoutRight>::value);
676 constexpr bool is_contiguous = is_A_contiguous && is_x_contiguous && is_y_contiguous;
677 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, is_contiguous>::run (alpha, A, x, y);
678}
679
687template<class ViewType1,
688 class ViewType2,
689 class ViewType3,
690 class CoefficientType,
691 class IndexType = int>
692KOKKOS_INLINE_FUNCTION void
693GEMM (const char transA[],
694 const char transB[],
695 const CoefficientType& alpha,
696 const ViewType1& A,
697 const ViewType2& B,
698 const CoefficientType& beta,
699 const ViewType3& C)
700{
701 // Assert that A, B, and C are in fact matrices
702 static_assert (ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
703 static_assert (ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
704 static_assert (ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
705
706 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
707 typedef Kokkos::ArithTraits<Scalar> STS;
708 const Scalar ZERO = STS::zero();
709 const Scalar ONE = STS::one();
710
711 // Get the dimensions
712 IndexType m, n, k;
713 if(transA[0] == 'N' || transA[0] == 'n') {
714 m = static_cast<IndexType> (A.extent (0));
715 n = static_cast<IndexType> (A.extent (1));
716 }
717 else {
718 m = static_cast<IndexType> (A.extent (1));
719 n = static_cast<IndexType> (A.extent (0));
720 }
721 k = static_cast<IndexType> (C.extent (1));
722
723 // quick return if possible
724 if(alpha == ZERO && beta == ONE)
725 return;
726
727 // And if alpha equals zero...
728 if(alpha == ZERO) {
729 if(beta == ZERO) {
730 for(IndexType i=0; i<m; i++) {
731 for(IndexType j=0; j<k; j++) {
732 C(i,j) = ZERO;
733 }
734 }
735 }
736 else {
737 for(IndexType i=0; i<m; i++) {
738 for(IndexType j=0; j<k; j++) {
739 C(i,j) = beta*C(i,j);
740 }
741 }
742 }
743 }
744
745 // Start the operations
746 if(transB[0] == 'n' || transB[0] == 'N') {
747 if(transA[0] == 'n' || transA[0] == 'N') {
748 // Form C = alpha*A*B + beta*C
749 for(IndexType j=0; j<n; j++) {
750 if(beta == ZERO) {
751 for(IndexType i=0; i<m; i++) {
752 C(i,j) = ZERO;
753 }
754 }
755 else if(beta != ONE) {
756 for(IndexType i=0; i<m; i++) {
757 C(i,j) = beta*C(i,j);
758 }
759 }
760 for(IndexType l=0; l<k; l++) {
761 Scalar temp = alpha*B(l,j);
762 for(IndexType i=0; i<m; i++) {
763 C(i,j) = C(i,j) + temp*A(i,l);
764 }
765 }
766 }
767 }
768 else {
769 // Form C = alpha*A**T*B + beta*C
770 for(IndexType j=0; j<n; j++) {
771 for(IndexType i=0; i<m; i++) {
772 Scalar temp = ZERO;
773 for(IndexType l=0; l<k; l++) {
774 temp = temp + A(l,i)*B(l,j);
775 }
776 if(beta == ZERO) {
777 C(i,j) = alpha*temp;
778 }
779 else {
780 C(i,j) = alpha*temp + beta*C(i,j);
781 }
782 }
783 }
784 }
785 }
786 else {
787 if(transA[0] == 'n' || transA[0] == 'N') {
788 // Form C = alpha*A*B**T + beta*C
789 for(IndexType j=0; j<n; j++) {
790 if(beta == ZERO) {
791 for(IndexType i=0; i<m; i++) {
792 C(i,j) = ZERO;
793 }
794 }
795 else if(beta != ONE) {
796 for(IndexType i=0; i<m; i++) {
797 C(i,j) = beta*C(i,j);
798 }
799 }
800 for(IndexType l=0; l<k; l++) {
801 Scalar temp = alpha*B(j,l);
802 for(IndexType i=0; i<m; i++) {
803 C(i,j) = C(i,j) + temp*A(i,l);
804 }
805 }
806 }
807 }
808 else {
809 // Form C = alpha*A**T*B**T + beta*C
810 for(IndexType j=0; j<n; j++) {
811 for(IndexType i=0; i<m; i++) {
812 Scalar temp = ZERO;
813 for(IndexType l=0; l<k; l++) {
814 temp = temp + A(l,i)*B(j,l);
815 }
816 if(beta == ZERO) {
817 C(i,j) = alpha*temp;
818 }
819 else {
820 C(i,j) = alpha*temp + beta*C(i,j);
821 }
822 }
823 }
824 }
825 }
826}
827
829template<class LittleBlockType,
830 class LittleVectorType>
831KOKKOS_INLINE_FUNCTION void
832GETF2 (const LittleBlockType& A, const LittleVectorType& ipiv, int& info)
833{
834 // The type of an entry of ipiv is the index type.
835 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
836 static_assert (std::is_integral<IndexType>::value,
837 "GETF2: The type of each entry of ipiv must be an integer type.");
838 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
839 static_assert (! std::is_const<Scalar>::value,
840 "GETF2: A must not be a const View (or LittleBlock).");
841 static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
842 "GETF2: ipiv must not be a const View (or LittleBlock).");
843 static_assert (LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
844 typedef Kokkos::ArithTraits<Scalar> STS;
845 const Scalar ZERO = STS::zero();
846
847 const IndexType numRows = static_cast<IndexType> (A.extent (0));
848 const IndexType numCols = static_cast<IndexType> (A.extent (1));
849 const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
850
851 // std::min is not a CUDA device function
852 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
853 if (pivDim < minPivDim) {
854 info = -2;
855 return;
856 }
857
858 // Initialize info
859 info = 0;
860
861 for(IndexType j=0; j < pivDim; j++)
862 {
863 // Find pivot and test for singularity
864 IndexType jp = j;
865 for(IndexType i=j+1; i<numRows; i++)
866 {
867 if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
868 jp = i;
869 }
870 }
871 ipiv(j) = jp+1;
872
873 if(A(jp,j) != ZERO)
874 {
875 // Apply the interchange to columns 1:N
876 if(jp != j)
877 {
878 for(IndexType i=0; i < numCols; i++)
879 {
880 Scalar temp = A(jp,i);
881 A(jp,i) = A(j,i);
882 A(j,i) = temp;
883 }
884 }
885
886 // Compute elements J+1:M of J-th column
887 for(IndexType i=j+1; i<numRows; i++) {
888 A(i,j) = A(i,j) / A(j,j);
889 }
890 }
891 else if(info == 0) {
892 info = j;
893 }
894
895 // Update trailing submatrix
896 for(IndexType r=j+1; r < numRows; r++)
897 {
898 for(IndexType c=j+1; c < numCols; c++) {
899 A(r,c) = A(r,c) - A(r,j) * A(j,c);
900 }
901 }
902 }
903}
904
905namespace Impl {
906
910template<class LittleBlockType,
911 class LittleIntVectorType,
912 class LittleScalarVectorType,
913 const int rank = LittleScalarVectorType::rank>
914struct GETRS {
915 static KOKKOS_INLINE_FUNCTION void
916 run (const char mode[],
917 const LittleBlockType& A,
918 const LittleIntVectorType& ipiv,
919 const LittleScalarVectorType& B,
920 int& info);
921};
922
924template<class LittleBlockType,
925 class LittleIntVectorType,
926 class LittleScalarVectorType>
927struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
928 static KOKKOS_INLINE_FUNCTION void
929 run (const char mode[],
930 const LittleBlockType& A,
931 const LittleIntVectorType& ipiv,
932 const LittleScalarVectorType& B,
933 int& info)
934 {
935 // The type of an entry of ipiv is the index type.
936 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
937 // IndexType must be signed, because this code does a countdown loop
938 // to zero. Unsigned integers are always >= 0, even on underflow.
939 static_assert (std::is_integral<IndexType>::value &&
940 std::is_signed<IndexType>::value,
941 "GETRS: The type of each entry of ipiv must be a signed integer.");
942 typedef typename std::decay<decltype (A(0,0))>::type Scalar;
943 static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
944 "GETRS: B must not be a const View (or LittleBlock).");
945 static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
946 static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
947 static_assert (LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
948
949 typedef Kokkos::ArithTraits<Scalar> STS;
950 const Scalar ZERO = STS::zero();
951 const IndexType numRows = static_cast<IndexType> (A.extent (0));
952 const IndexType numCols = static_cast<IndexType> (A.extent (1));
953 const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
954
955 info = 0;
956
957 // Ensure that the matrix is square
958 if (numRows != numCols) {
959 info = -2;
960 return;
961 }
962
963 // Ensure that the pivot array is sufficiently large
964 if (pivDim < numRows) {
965 info = -3;
966 return;
967 }
968
969 // No transpose case
970 if(mode[0] == 'n' || mode[0] == 'N') {
971 // Apply row interchanges to the RHS
972 for(IndexType i=0; i<numRows; i++) {
973 if(ipiv(i) != i+1) {
974 Scalar temp = B(i);
975 B(i) = B(ipiv(i)-1);
976 B(ipiv(i)-1) = temp;
977 }
978 }
979
980 // Solve Lx=b, overwriting b with x
981 for(IndexType r=1; r < numRows; r++) {
982 for(IndexType c=0; c < r; c++) {
983 B(r) = B(r) - A(r,c)*B(c);
984 }
985 }
986
987 // Solve Ux=b, overwriting b with x
988 for(IndexType r=numRows-1; r >= 0; r--) {
989 // Check whether U is singular
990 if(A(r,r) == ZERO) {
991 info = r+1;
992 return;
993 }
994
995 for(IndexType c=r+1; c < numCols; c++) {
996 B(r) = B(r) - A(r,c)*B(c);
997 }
998 B(r) = B(r) / A(r,r);
999 }
1000 }
1001 // Transpose case
1002 else if(mode[0] == 't' || mode[0] == 'T') {
1003 info = -1; // NOT YET IMPLEMENTED
1004 return;
1005 }
1006 // Conjugate transpose case
1007 else if (mode[0] == 'c' || mode[0] == 'C') {
1008 info = -1; // NOT YET IMPLEMENTED
1009 return;
1010 }
1011 else { // invalid mode
1012 info = -1;
1013 return;
1014 }
1015 }
1016};
1017
1018
1020template<class LittleBlockType,
1021 class LittleIntVectorType,
1022 class LittleScalarVectorType>
1023struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1024 static KOKKOS_INLINE_FUNCTION void
1025 run (const char mode[],
1026 const LittleBlockType& A,
1027 const LittleIntVectorType& ipiv,
1028 const LittleScalarVectorType& B,
1029 int& info)
1030 {
1031 // The type of an entry of ipiv is the index type.
1032 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1033 static_assert (std::is_integral<IndexType>::value,
1034 "GETRS: The type of each entry of ipiv must be an integer type.");
1035 static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1036 "GETRS: B must not be a const View (or LittleBlock).");
1037 static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1038 static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1039 static_assert (LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
1040
1041 // The current implementation iterates over one right-hand side at
1042 // a time. It might be faster to do this differently, but this
1043 // should work for now.
1044 const IndexType numRhs = B.extent (1);
1045 info = 0;
1046
1047 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1048 auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1049 GETRS<LittleBlockType, LittleIntVectorType, decltype (B_cur), 1>::run (mode, A, ipiv, B_cur, info);
1050 if (info != 0) {
1051 return;
1052 }
1053 }
1054 }
1055};
1056
1057} // namespace Impl
1058
1062template<class LittleBlockType,
1063 class LittleIntVectorType,
1064 class LittleScalarVectorType>
1065KOKKOS_INLINE_FUNCTION void
1066GETRS (const char mode[],
1067 const LittleBlockType& A,
1068 const LittleIntVectorType& ipiv,
1069 const LittleScalarVectorType& B,
1070 int& info)
1071{
1072 Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1073 LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1074}
1075
1076
1091template<class LittleBlockType,
1092 class LittleIntVectorType,
1093 class LittleScalarVectorType>
1094KOKKOS_INLINE_FUNCTION void
1095GETRI (const LittleBlockType& A,
1096 const LittleIntVectorType& ipiv,
1097 const LittleScalarVectorType& work,
1098 int& info)
1099{
1100 // The type of an entry of ipiv is the index type.
1101 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1102 // IndexType must be signed, because this code does a countdown loop
1103 // to zero. Unsigned integers are always >= 0, even on underflow.
1104 static_assert (std::is_integral<IndexType>::value &&
1105 std::is_signed<IndexType>::value,
1106 "GETRI: The type of each entry of ipiv must be a signed integer.");
1107 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1108 static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1109 "GETRI: A must not be a const View (or LittleBlock).");
1110 static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1111 "GETRI: work must not be a const View (or LittleBlock).");
1112 static_assert (LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1113 typedef Kokkos::ArithTraits<Scalar> STS;
1114 const Scalar ZERO = STS::zero();
1115 const Scalar ONE = STS::one();
1116
1117 const IndexType numRows = static_cast<IndexType> (A.extent (0));
1118 const IndexType numCols = static_cast<IndexType> (A.extent (1));
1119 const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
1120 const IndexType workDim = static_cast<IndexType> (work.extent (0));
1121
1122 info = 0;
1123
1124 // Ensure that the matrix is square
1125 if (numRows != numCols) {
1126 info = -1;
1127 return;
1128 }
1129
1130 // Ensure that the pivot array is sufficiently large
1131 if (pivDim < numRows) {
1132 info = -2;
1133 return;
1134 }
1135
1136 // Ensure that the work array is sufficiently large
1137 if (workDim < numRows) {
1138 info = -3;
1139 return;
1140 }
1141
1142 // Form Uinv in place
1143 for(IndexType j=0; j < numRows; j++) {
1144 if(A(j,j) == ZERO) {
1145 info = j+1;
1146 return;
1147 }
1148
1149 A(j,j) = ONE / A(j,j);
1150
1151 // Compute elements 1:j-1 of j-th column
1152 for(IndexType r=0; r < j; r++) {
1153 A(r,j) = A(r,r)*A(r,j);
1154 for(IndexType c=r+1; c < j; c++) {
1155 A(r,j) = A(r,j) + A(r,c)*A(c,j);
1156 }
1157 }
1158 for(IndexType r=0; r < j; r++) {
1159 A(r,j) = -A(j,j)*A(r,j);
1160 }
1161 }
1162
1163 // Compute Ainv by solving A\L = Uinv
1164 for(IndexType j = numCols-2; j >= 0; j--) {
1165 // Copy lower triangular data to work array and replace with 0
1166 for(IndexType r=j+1; r < numRows; r++) {
1167 work(r) = A(r,j);
1168 A(r,j) = 0;
1169 }
1170
1171 for(IndexType r=0; r < numRows; r++) {
1172 for(IndexType i=j+1; i < numRows; i++) {
1173 A(r,j) = A(r,j) - work(i)*A(r,i);
1174 }
1175 }
1176 }
1177
1178 // Apply column interchanges
1179 for(IndexType j=numRows-1; j >= 0; j--) {
1180 IndexType jp = ipiv(j)-1;
1181 if(j != jp) {
1182 for(IndexType r=0; r < numRows; r++) {
1183 Scalar temp = A(r,j);
1184 A(r,j) = A(r,jp);
1185 A(r,jp) = temp;
1186 }
1187 }
1188 }
1189}
1190
1191
1192// mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1193// an implementation for trans != 'N' (the transpose and conjugate
1194// transpose cases).
1195#if 0
1196template<class LittleBlockType,
1197 class LittleVectorTypeX,
1198 class LittleVectorTypeY,
1199 class CoefficientType,
1200 class IndexType = int>
1201KOKKOS_INLINE_FUNCTION void
1202GEMV (const char trans,
1203 const CoefficientType& alpha,
1204 const LittleBlockType& A,
1205 const LittleVectorTypeX& x,
1206 const CoefficientType& beta,
1207 const LittleVectorTypeY& y)
1208{
1209 // y(0) returns a reference to the 0-th entry of y. Remove that
1210 // reference to get the type of each entry of y. It's OK if y has
1211 // zero entries -- this doesn't actually do y(i), it just returns
1212 // the type of that expression.
1213 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1214 const IndexType numRows = static_cast<IndexType> (A.extent (0));
1215 const IndexType numCols = static_cast<IndexType> (A.extent (1));
1216
1217 if (beta == 0.0) {
1218 if (alpha == 0.0) {
1219 for (IndexType i = 0; i < numRows; ++i) {
1220 y(i) = 0.0;
1221 }
1222 }
1223 else {
1224 for (IndexType i = 0; i < numRows; ++i) {
1225 y_value_type y_i = 0.0;
1226 for (IndexType j = 0; j < numCols; ++j) {
1227 y_i += A(i,j) * x(j);
1228 }
1229 y(i) = y_i;
1230 }
1231 }
1232 }
1233 else { // beta != 0
1234 if (alpha == 0.0) {
1235 if (beta == 0.0) {
1236 for (IndexType i = 0; i < numRows; ++i) {
1237 y(i) = 0.0;
1238 }
1239 }
1240 else {
1241 for (IndexType i = 0; i < numRows; ++i) {
1242 y(i) *= beta;
1243 }
1244 }
1245 }
1246 else {
1247 for (IndexType i = 0; i < numRows; ++i) {
1248 y_value_type y_i = beta * y(i);
1249 for (IndexType j = 0; j < numCols; ++j) {
1250 y_i += alpha * A(i,j) * x(j);
1251 }
1252 y(i) = y_i;
1253 }
1254 }
1255 }
1256}
1257
1258#endif // 0
1259
1260} // namespace Tpetra
1261
1262#endif // TPETRA_BLOCKVIEW_HPP
Namespace for new Tpetra features that are not ready for public release, but are ready for evaluation...
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C.
@ ZERO
Replace old values with zero.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices).
Implementation of Tpetra::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices).
Implementation of Tpetra::COPY function.
Implementation of Tpetra::FILL function.
Computes the solution to Ax=b.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix).
Implementation of Tpetra::SCAL function.