10#ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
11#define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
13#include "TpetraCore_config.h"
14#include "Teuchos_Array.hpp"
15#include "Teuchos_ArrayView.hpp"
59namespace PackCrsMatrixImpl {
67template<
class OutputOffsetsViewType,
69 class InputOffsetsViewType,
70 class InputLocalRowIndicesViewType,
71 class InputLocalRowPidsViewType,
73#ifdef HAVE_TPETRA_DEBUG
79class NumPacketsAndOffsetsFunctor {
81 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
82 typedef typename CountsViewType::non_const_value_type count_type;
83 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
84 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
85 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
87 typedef typename OutputOffsetsViewType::device_type device_type;
88 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
89 typename device_type::execution_space>::value,
90 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
91 static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
92 "OutputOffsetsViewType must be a Kokkos::View.");
93 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
94 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
95 static_assert (std::is_integral<output_offset_type>::value,
96 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
97 static_assert (Kokkos::is_view<CountsViewType>::value,
98 "CountsViewType must be a Kokkos::View.");
99 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
100 "CountsViewType must be a nonconst Kokkos::View.");
101 static_assert (std::is_integral<count_type>::value,
102 "The type of each entry of CountsViewType must be a built-in integer type.");
103 static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
104 "InputOffsetsViewType must be a Kokkos::View.");
105 static_assert (std::is_integral<input_offset_type>::value,
106 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
107 static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
108 "InputLocalRowIndicesViewType must be a Kokkos::View.");
109 static_assert (std::is_integral<local_row_index_type>::value,
110 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
112 NumPacketsAndOffsetsFunctor (
const OutputOffsetsViewType& outputOffsets,
113 const CountsViewType& counts,
114 const InputOffsetsViewType& rowOffsets,
115 const InputLocalRowIndicesViewType& lclRowInds,
116 const InputLocalRowPidsViewType& lclRowPids,
117 const count_type sizeOfLclCount,
118 const count_type sizeOfGblColInd,
119 const count_type sizeOfPid,
120 const count_type sizeOfValue) :
121 outputOffsets_ (outputOffsets),
123 rowOffsets_ (rowOffsets),
124 lclRowInds_ (lclRowInds),
125 lclRowPids_ (lclRowPids),
126 sizeOfLclCount_ (sizeOfLclCount),
127 sizeOfGblColInd_ (sizeOfGblColInd),
128 sizeOfPid_ (sizeOfPid),
129 sizeOfValue_ (sizeOfValue),
133 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
135 if (numRowsToPack !=
static_cast<size_t> (counts_.extent (0))) {
136 std::ostringstream os;
137 os <<
"lclRowInds.extent(0) = " << numRowsToPack
138 <<
" != counts.extent(0) = " << counts_.extent (0)
140 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
142 if (
static_cast<size_t> (numRowsToPack + 1) !=
143 static_cast<size_t> (outputOffsets_.extent (0))) {
144 std::ostringstream os;
145 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
146 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
148 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
153 KOKKOS_INLINE_FUNCTION
void
154 operator() (
const local_row_index_type& curInd,
155 output_offset_type& update,
156 const bool final)
const
159 if (curInd <
static_cast<local_row_index_type
> (0)) {
167 if (curInd >=
static_cast<local_row_index_type
> (outputOffsets_.extent (0))) {
172 outputOffsets_(curInd) = update;
175 if (curInd <
static_cast<local_row_index_type
> (counts_.extent (0))) {
176 const auto lclRow = lclRowInds_(curInd);
177 if (
static_cast<size_t> (lclRow + 1) >=
static_cast<size_t> (rowOffsets_.extent (0)) ||
178 static_cast<local_row_index_type
> (lclRow) <
static_cast<local_row_index_type
> (0)) {
186 const count_type count =
187 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
193 const count_type numBytes = (count == 0) ?
194 static_cast<count_type
> (0) :
195 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
196 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
200 counts_(curInd) = numBytes;
212 auto error_h = Kokkos::create_mirror_view (error_);
217 Kokkos::deep_copy (error_h, error_);
222 OutputOffsetsViewType outputOffsets_;
223 CountsViewType counts_;
224 typename InputOffsetsViewType::const_type rowOffsets_;
225 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
226 typename InputLocalRowPidsViewType::const_type lclRowPids_;
227 count_type sizeOfLclCount_;
228 count_type sizeOfGblColInd_;
229 count_type sizeOfPid_;
230 count_type sizeOfValue_;
231 Kokkos::View<int, device_type> error_;
243template<
class OutputOffsetsViewType,
244 class CountsViewType,
245 class InputOffsetsViewType,
246 class InputLocalRowIndicesViewType,
247 class InputLocalRowPidsViewType>
248typename CountsViewType::non_const_value_type
250 const CountsViewType& counts,
251 const InputOffsetsViewType& rowOffsets,
252 const InputLocalRowIndicesViewType& lclRowInds,
253 const InputLocalRowPidsViewType& lclRowPids,
254 const typename CountsViewType::non_const_value_type sizeOfLclCount,
255 const typename CountsViewType::non_const_value_type sizeOfGblColInd,
256 const typename CountsViewType::non_const_value_type sizeOfPid,
257 const typename CountsViewType::non_const_value_type sizeOfValue)
260 CountsViewType,
typename InputOffsetsViewType::const_type,
261 typename InputLocalRowIndicesViewType::const_type,
262 typename InputLocalRowPidsViewType::const_type> functor_type;
263 typedef typename CountsViewType::non_const_value_type count_type;
264 typedef typename OutputOffsetsViewType::size_type size_type;
265 typedef typename OutputOffsetsViewType::execution_space
execution_space;
266 typedef typename functor_type::local_row_index_type LO;
267 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
268 const char prefix[] =
"computeNumPacketsAndOffsets: ";
270 count_type count = 0;
271 const count_type numRowsToPack = lclRowInds.extent (0);
273 if (numRowsToPack == 0) {
277 TEUCHOS_TEST_FOR_EXCEPTION
278 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
279 std::invalid_argument, prefix <<
"There is at least one row to pack, "
280 "but the matrix has no rows. lclRowInds.extent(0) = " <<
281 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
282 rowOffsets.extent (0) <<
" <= 1.");
283 TEUCHOS_TEST_FOR_EXCEPTION
284 (outputOffsets.extent (0) !=
285 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
286 prefix <<
"Output dimension does not match number of rows to pack. "
287 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
288 <<
" != lclRowInds.extent(0) + 1 = "
289 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
290 TEUCHOS_TEST_FOR_EXCEPTION
291 (counts.extent (0) != numRowsToPack, std::invalid_argument,
292 prefix <<
"counts.extent(0) = " << counts.extent (0)
293 <<
" != numRowsToPack = " << numRowsToPack <<
".");
295 functor_type f (outputOffsets, counts, rowOffsets,
296 lclRowInds, lclRowPids, sizeOfLclCount,
297 sizeOfGblColInd, sizeOfPid, sizeOfValue);
298 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
301 const int errCode = f.getError ();
302 TEUCHOS_TEST_FOR_EXCEPTION
303 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
304 << errCode <<
" != 0.");
308 for (LO k = 0; k < numRowsToPack; ++k) {
311 if (outputOffsets(numRowsToPack) != total) {
312 if (errStr.get () == NULL) {
313 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
315 std::ostringstream& os = *errStr;
317 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
318 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
319 << total <<
"." << std::endl;
320 if (numRowsToPack != 0) {
322 if (numRowsToPack <
static_cast<LO
> (10)) {
323 os <<
"outputOffsets: [";
324 for (LO i = 0; i <= numRowsToPack; ++i) {
325 os << outputOffsets(i);
326 if (
static_cast<LO
> (i + 1) <= numRowsToPack) {
330 os <<
"]" << std::endl;
332 for (LO i = 0; i < numRowsToPack; ++i) {
334 if (
static_cast<LO
> (i + 1) < numRowsToPack) {
338 os <<
"]" << std::endl;
341 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
342 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
345 count = outputOffsets(numRowsToPack);
346 return {
false, errStr};
352 using Tpetra::Details::getEntryOnHost;
353 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
373template<
class ST,
class ColumnMap,
class BufferDeviceType>
375Kokkos::pair<int, size_t>
377 const Kokkos::View<char*, BufferDeviceType>& exports,
382 const size_t num_ent,
383 const size_t num_bytes_per_value,
384 const bool pack_pids)
386 using Kokkos::subview;
387 using LO =
typename ColumnMap::local_ordinal_type;
388 using GO =
typename ColumnMap::global_ordinal_type;
389 using return_type = Kokkos::pair<int, size_t>;
393 return return_type (0, 0);
396 const LO num_ent_LO =
static_cast<LO
> (num_ent);
397 const size_t num_ent_beg = offset;
400 const size_t gids_beg = num_ent_beg + num_ent_len;
403 const size_t pids_beg = gids_beg + gids_len;
404 const size_t pids_len = pack_pids ?
406 static_cast<size_t> (0);
408 const size_t vals_beg = gids_beg + gids_len + pids_len;
409 const size_t vals_len = num_ent * num_bytes_per_value;
411 char*
const num_ent_out = exports.data () + num_ent_beg;
412 char*
const gids_out = exports.data () + gids_beg;
413 char*
const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
414 char*
const vals_out = exports.data () + vals_beg;
416 size_t num_bytes_out = 0;
423 for (
size_t k = 0; k < num_ent; ++k) {
424 const LO lid = lids_in[k];
425 const GO gid = col_map.getGlobalElement (lid);
430 for (
size_t k = 0; k < num_ent; ++k) {
431 const LO lid = lids_in[k];
432 const int pid = pids_in[lid];
438 error_code += p.first;
439 num_bytes_out += p.second;
442 if (error_code != 0) {
443 return return_type (10, num_bytes_out);
446 const size_t expected_num_bytes =
447 num_ent_len + gids_len + pids_len + vals_len;
448 if (num_bytes_out != expected_num_bytes) {
449 return return_type (11, num_bytes_out);
451 return return_type (0, num_bytes_out);
454template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
455struct PackCrsMatrixFunctor {
458 typedef typename local_matrix_device_type::value_type ST;
461 typedef typename local_matrix_device_type::device_type DT;
463 typedef Kokkos::View<const size_t*, BufferDeviceType>
464 num_packets_per_lid_view_type;
465 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
466 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
470 typedef typename num_packets_per_lid_view_type::non_const_value_type
472 typedef typename offsets_view_type::non_const_value_type
474 typedef Kokkos::pair<int, LO> value_type;
476 static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
477 "local_map_type::local_ordinal_type and "
478 "local_matrix_device_type::ordinal_type must be the same.");
481 local_map_type local_col_map;
482 exports_view_type exports;
483 num_packets_per_lid_view_type num_packets_per_lid;
484 export_lids_view_type export_lids;
485 source_pids_view_type source_pids;
486 offsets_view_type offsets;
487 size_t num_bytes_per_value;
491 const local_map_type& local_col_map_in,
492 const exports_view_type& exports_in,
493 const num_packets_per_lid_view_type& num_packets_per_lid_in,
494 const export_lids_view_type& export_lids_in,
495 const source_pids_view_type& source_pids_in,
496 const offsets_view_type& offsets_in,
497 const size_t num_bytes_per_value_in,
498 const bool pack_pids_in) :
499 local_matrix (local_matrix_in),
500 local_col_map (local_col_map_in),
501 exports (exports_in),
502 num_packets_per_lid (num_packets_per_lid_in),
503 export_lids (export_lids_in),
504 source_pids (source_pids_in),
505 offsets (offsets_in),
506 num_bytes_per_value (num_bytes_per_value_in),
507 pack_pids (pack_pids_in)
509 const LO numRows = local_matrix_in.numRows ();
511 static_cast<LO
> (local_matrix.graph.row_map.extent (0));
512 TEUCHOS_TEST_FOR_EXCEPTION
513 (numRows != 0 && rowMapDim != numRows +
static_cast<LO
> (1),
514 std::logic_error,
"local_matrix.graph.row_map.extent(0) = "
515 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
518 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
520 using ::Tpetra::Details::OrdinalTraits;
521 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
524 KOKKOS_INLINE_FUNCTION
void
525 join (value_type& dst,
const value_type& src)
const
529 if (src.first != 0 && dst.first == 0) {
534 KOKKOS_INLINE_FUNCTION
535 void operator() (
const LO i, value_type& dst)
const
537 const size_t offset = offsets[i];
538 const LO export_lid = export_lids[i];
539 const size_t buf_size = exports.size();
540 const size_t num_bytes = num_packets_per_lid(i);
541 const size_t num_ent =
542 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
543 - local_matrix.graph.row_map[export_lid]);
553 if (export_lid >= local_matrix.numRows ()) {
554 if (dst.first != 0) {
555 dst = Kokkos::make_pair (1, i);
559 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
560 if (dst.first != 0) {
561 dst = Kokkos::make_pair (2, i);
571 const auto row_beg = local_matrix.graph.row_map[export_lid];
572 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
573 auto vals_in = subview (local_matrix.values,
574 Kokkos::make_pair (row_beg, row_end));
575 auto lids_in = subview (local_matrix.graph.entries,
576 Kokkos::make_pair (row_beg, row_end));
577 typedef local_map_type LMT;
578 typedef BufferDeviceType BDT;
579 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
580 source_pids, vals_in, offset,
581 num_ent, num_bytes_per_value,
583 int error_code_this_row = p.first;
584 size_t num_bytes_packed_this_row = p.second;
585 if (error_code_this_row != 0) {
586 if (dst.first != 0) {
587 dst = Kokkos::make_pair (error_code_this_row, i);
590 else if (num_bytes_packed_this_row != num_bytes) {
591 if (dst.first != 0) {
592 dst = Kokkos::make_pair (3, i);
605template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
609 const Kokkos::View<char*, BufferDeviceType>& exports,
613 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
614 const size_t num_bytes_per_value,
615 const bool pack_pids)
618 using DT =
typename LocalMatrix::device_type;
619 using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
620 const char prefix[] =
"Tpetra::Details::do_pack: ";
622 if (export_lids.extent (0) != 0) {
623 TEUCHOS_TEST_FOR_EXCEPTION
624 (
static_cast<size_t> (offsets.extent (0)) !=
625 static_cast<size_t> (export_lids.extent (0) + 1),
626 std::invalid_argument, prefix <<
"offsets.extent(0) = "
627 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
628 << export_lids.extent (0) <<
") + 1.");
629 TEUCHOS_TEST_FOR_EXCEPTION
630 (export_lids.extent (0) != num_packets_per_lid.extent (0),
631 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
632 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
633 << num_packets_per_lid.extent (0) <<
".");
637 TEUCHOS_TEST_FOR_EXCEPTION
638 (pack_pids && exports.extent (0) != 0 &&
639 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
640 "pack_pids is true, and exports.extent(0) = " <<
641 exports.extent (0) <<
" != 0, meaning that we need to pack at "
642 "least one matrix entry, but source_pids.extent(0) = 0.");
645 using pack_functor_type =
646 PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
647 pack_functor_type f (local_matrix, local_map, exports,
648 num_packets_per_lid, export_lids,
649 source_pids, offsets, num_bytes_per_value,
652 typename pack_functor_type::value_type result;
653 range_type range (0, num_packets_per_lid.extent (0));
654 Kokkos::parallel_reduce (range, f, result);
656 if (result.first != 0) {
659 TEUCHOS_TEST_FOR_EXCEPTION
660 (
true, std::runtime_error, prefix <<
"PackCrsMatrixFunctor "
661 "reported error code " << result.first <<
" for the first "
662 "bad row " << result.second <<
".");
695template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
698 Kokkos::DualView<char*, BufferDeviceType>& exports,
699 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
700 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
701 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
702 size_t& constant_num_packets,
703 const bool pack_pids)
706 "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
710 typedef BufferDeviceType DT;
711 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
712 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
713 constexpr bool debug =
false;
716 auto local_col_map = sourceMatrix.
getColMap ()->getLocalMap ();
721 constant_num_packets = 0;
723 const size_t num_export_lids =
724 static_cast<size_t> (export_lids.extent (0));
725 TEUCHOS_TEST_FOR_EXCEPTION
727 static_cast<size_t> (num_packets_per_lid.extent (0)),
728 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
729 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
730 << num_packets_per_lid.extent (0) <<
".");
731 if (num_export_lids != 0) {
732 TEUCHOS_TEST_FOR_EXCEPTION
733 (num_packets_per_lid.data () == NULL, std::invalid_argument,
734 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
735 "num_packets_per_lid.data() = "
736 << num_packets_per_lid.data () <<
" == NULL.");
743 size_t num_bytes_per_value = 0;
758 size_t num_bytes_per_value_l = 0;
759 if (local_matrix.values.extent(0) > 0) {
760 const ST& val = local_matrix.values(0);
763 using Teuchos::reduceAll;
764 reduceAll<int, size_t> (* (sourceMatrix.
getComm ()),
766 num_bytes_per_value_l,
767 Teuchos::outArg (num_bytes_per_value));
770 if (num_export_lids == 0) {
771 exports = exports_view_type (
"exports", 0);
776 Kokkos::View<size_t*, DT> offsets (
"offsets", num_export_lids + 1);
782 local_matrix.graph.row_map, export_lids,
784 num_bytes_per_lid, num_bytes_per_gid,
785 num_bytes_per_pid, num_bytes_per_value);
788 if (count >
static_cast<size_t> (exports.extent (0))) {
789 exports = exports_view_type (
"exports", count);
791 std::ostringstream os;
792 os <<
"*** exports resized to " << count << std::endl;
793 std::cerr << os.str ();
797 std::ostringstream os;
798 os <<
"*** count: " << count <<
", exports.extent(0): "
799 << exports.extent (0) << std::endl;
800 std::cerr << os.str ();
806 TEUCHOS_TEST_FOR_EXCEPTION
807 (pack_pids && exports.extent (0) != 0 &&
808 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
809 "pack_pids is true, and exports.extent(0) = " <<
810 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
811 "one matrix entry, but export_pids.extent(0) = 0.");
813 typedef typename std::decay<
decltype (local_matrix)>::type
815 typedef typename std::decay<
decltype (local_col_map)>::type
818 exports.modify_device ();
819 auto exports_d = exports.view_device ();
821 (local_matrix, local_col_map, exports_d, num_packets_per_lid,
822 export_lids, export_pids, offsets, num_bytes_per_value,
829template<
typename ST,
typename LO,
typename GO,
typename NT>
832 Teuchos::Array<char>& exports,
833 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
834 const Teuchos::ArrayView<const LO>& exportLIDs,
835 size_t& constantNumPackets)
840 using host_exec_space =
typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
841 using device_exec_space =
typename device_type::execution_space;
842 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
848 Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
850 numPacketsPerLID.getRawPtr (),
851 numPacketsPerLID.size (),
false,
852 "num_packets_per_lid");
859 Kokkos::View<const LO*, buffer_device_type> export_lids_d =
861 exportLIDs.getRawPtr (),
862 exportLIDs.size (),
true,
865 Kokkos::View<int*, device_type> export_pids_d;
866 Kokkos::DualView<char*, buffer_device_type> exports_dv;
867 constexpr bool pack_pids =
false;
869 sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
870 export_pids_d, constantNumPackets, pack_pids);
874 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
875 (numPacketsPerLID.getRawPtr (),
876 numPacketsPerLID.size ());
878 Kokkos::deep_copy (device_exec_space(), num_packets_per_lid_h, num_packets_per_lid_d);
885 if (
static_cast<size_t> (exports.size ()) !=
886 static_cast<size_t> (exports_dv.extent (0))) {
887 exports.resize (exports_dv.extent (0));
889 Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
892 Kokkos::deep_copy (device_exec_space(), exports_h, exports_dv.view_device());
895template<
typename ST,
typename LO,
typename GO,
typename NT>
902 size_t& constantNumPackets)
908 Kokkos::View<int*, device_type> exportPIDs_d (
"exportPIDs", 0);
909 constexpr bool pack_pids =
false;
912 auto numPacketsPerLID_nc = numPacketsPerLID;
913 numPacketsPerLID_nc.clear_sync_state ();
914 numPacketsPerLID_nc.modify_device ();
915 auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
918 TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
919 auto exportLIDs_d = exportLIDs.view_device ();
922 "Tpetra::Details::packCrsMatrixNew",
926 sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
927 exportPIDs_d, constantNumPackets, pack_pids);
930template<
typename ST,
typename LO,
typename GO,
typename NT>
934 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
935 const Teuchos::ArrayView<const LO>& exportLIDs,
936 const Teuchos::ArrayView<const int>& sourcePIDs,
937 size_t& constantNumPackets)
941 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
942 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
944 typename local_matrix_device_type::device_type outputDevice;
949 std::unique_ptr<std::string> prefix;
951 const int myRank = [&] () {
952 auto map = sourceMatrix.
getMap ();
953 if (map.get () ==
nullptr) {
956 auto comm = map->getComm ();
957 if (comm.get () ==
nullptr) {
960 return comm->getRank ();
962 std::ostringstream os;
963 os <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs: ";
964 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
966 std::ostringstream os2;
967 os2 << *prefix <<
"start" << std::endl;
968 std::cerr << os2.str ();
975 auto num_packets_per_lid_d =
977 numPacketsPerLID.getRawPtr (),
978 numPacketsPerLID.size (),
false,
979 "num_packets_per_lid");
985 exportLIDs.getRawPtr (),
986 exportLIDs.size (),
true,
992 sourcePIDs.getRawPtr (),
993 sourcePIDs.size (),
true,
995 constexpr bool pack_pids =
true;
998 (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
999 export_pids_d, constantNumPackets, pack_pids);
1001 catch (std::exception& e) {
1003 std::ostringstream os;
1004 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw: "
1005 << e.what () << std::endl;
1006 std::cerr << os.str ();
1012 std::ostringstream os;
1013 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw an exception "
1014 "not a subclass of std::exception" << std::endl;
1015 std::cerr << os.str ();
1020 if (numPacketsPerLID.size () != 0) {
1024 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1025 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1027 Kokkos::deep_copy (
execution_space(), num_packets_per_lid_h, num_packets_per_lid_d);
1029 catch (std::exception& e) {
1031 std::ostringstream os;
1032 os << *prefix <<
"Kokkos::deep_copy threw: " << e.what () << std::endl;
1033 std::cerr << os.str ();
1039 std::ostringstream os;
1040 os << *prefix <<
"Kokkos::deep_copy threw an exception not a subclass "
1041 "of std::exception" << std::endl;
1042 std::cerr << os.str ();
1049 std::ostringstream os;
1050 os << *prefix <<
"done" << std::endl;
1051 std::cerr << os.str ();
1058#define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1060 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1061 Teuchos::Array<char>&, \
1062 const Teuchos::ArrayView<size_t>&, \
1063 const Teuchos::ArrayView<const LO>&, \
1066 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1067 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1068 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1069 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1072 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1073 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1074 const Teuchos::ArrayView<size_t>&, \
1075 const Teuchos::ArrayView<const LO>&, \
1076 const Teuchos::ArrayView<const int>&, \
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
CountsViewType::non_const_value_type computeNumPacketsAndOffsets(const OutputOffsetsViewType &outputOffsets, const CountsViewType &counts, const InputOffsetsViewType &rowOffsets, const InputLocalRowIndicesViewType &lclRowInds, const InputLocalRowPidsViewType &lclRowPids, const typename CountsViewType::non_const_value_type sizeOfLclCount, const typename CountsViewType::non_const_value_type sizeOfGblColInd, const typename CountsViewType::non_const_value_type sizeOfPid, const typename CountsViewType::non_const_value_type sizeOfValue)
Compute the number of packets and offsets for the pack procedure.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, BufferDeviceType > &exports, const Kokkos::View< size_t *, BufferDeviceType > &num_packets_per_lid, const Kokkos::View< const LO *, BufferDeviceType > &export_lids, const Kokkos::View< const int *, typename NT::device_type > &export_pids, size_t &constant_num_packets, const bool pack_pids)
Pack specified entries of the given local sparse matrix for communication.
void do_pack(const LocalMatrix &local_matrix, const LocalMap &local_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< size_t >::input_array_type &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type >::input_array_type &export_lids, const typename PackTraits< int >::input_array_type &source_pids, const Kokkos::View< const size_t *, BufferDeviceType > &offsets, const size_t num_bytes_per_value, const bool pack_pids)
Perform the pack operation for the matrix.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
typename Node::device_type device_type
The Kokkos device type.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Nonmember function that computes a residual Computes R = B - A * X.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static const bool compileTimeSize
Whether the number of bytes required to pack one instance of value_type is fixed at compile time.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.