Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_packCrsGraph_def.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
11#define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
12
13#include "TpetraCore_config.h"
14#include "Teuchos_Array.hpp"
15#include "Teuchos_ArrayView.hpp"
23#include <memory>
24#include <string>
25
47
48namespace Tpetra {
49
50//
51// Users must never rely on anything in the Details namespace.
52//
53namespace Details {
54
55namespace PackCrsGraphImpl {
63template<class OutputOffsetsViewType,
64 class CountsViewType,
65 class InputOffsetsViewType,
66 class InputLocalRowIndicesViewType,
67 class InputLocalRowPidsViewType,
68 const bool debug =
69#ifdef HAVE_TPETRA_DEBUG
70 true
71#else
72 false
73#endif // HAVE_TPETRA_DEBUG
74 >
75class NumPacketsAndOffsetsFunctor{
76public:
77 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
78 typedef typename CountsViewType::non_const_value_type count_type;
79 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
80 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
81 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
82 // output Views drive where execution happens.
83 typedef typename OutputOffsetsViewType::device_type device_type;
84 static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
85 typename device_type::execution_space>::value,
86 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
87 static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
88 "OutputOffsetsViewType must be a Kokkos::View.");
89 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
90 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
91 static_assert (std::is_integral<output_offset_type>::value,
92 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
93 static_assert (Kokkos::is_view<CountsViewType>::value,
94 "CountsViewType must be a Kokkos::View.");
95 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
96 "CountsViewType must be a nonconst Kokkos::View.");
97 static_assert (std::is_integral<count_type>::value,
98 "The type of each entry of CountsViewType must be a built-in integer type.");
99 static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
100 "InputOffsetsViewType must be a Kokkos::View.");
101 static_assert (std::is_integral<input_offset_type>::value,
102 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
103 static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
104 "InputLocalRowIndicesViewType must be a Kokkos::View.");
105 static_assert (std::is_integral<local_row_index_type>::value,
106 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
107
108 NumPacketsAndOffsetsFunctor(const OutputOffsetsViewType& outputOffsets,
109 const CountsViewType& counts,
110 const InputOffsetsViewType& rowOffsets,
111 const InputLocalRowIndicesViewType& lclRowInds,
112 const InputLocalRowPidsViewType& lclRowPids) :
113 outputOffsets_ (outputOffsets),
114 counts_ (counts),
115 rowOffsets_ (rowOffsets),
116 lclRowInds_ (lclRowInds),
117 lclRowPids_ (lclRowPids),
118 error_ ("error") // don't forget this, or you'll get segfaults!
119 {
120 if (debug) {
121 const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
122
123 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
124 std::ostringstream os;
125 os << "lclRowInds.extent(0) = " << numRowsToPack
126 << " != counts.extent(0) = " << counts_.extent (0)
127 << ".";
128 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
129 }
130 if (static_cast<size_t> (numRowsToPack + 1) !=
131 static_cast<size_t> (outputOffsets_.extent (0))) {
132 std::ostringstream os;
133 os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
134 << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
135 << ".";
136 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
137 }
138 }
139 }
140
141 KOKKOS_INLINE_FUNCTION void
142 operator() (const local_row_index_type& curInd,
143 output_offset_type& update,
144 const bool final) const
145 {
146 if (debug) {
147 if (curInd < static_cast<local_row_index_type> (0)) {
148 error_ () = 1;
149 return;
150 }
151 }
152
153 if (final) {
154 if (debug) {
155 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
156 error_ () = 2;
157 return;
158 }
159 }
160 outputOffsets_(curInd) = update;
161 }
162
163 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
164 const auto lclRow = lclRowInds_(curInd);
165 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
166 static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
167 error_ () = 3;
168 return;
169 }
170 // count_type could differ from the type of each row offset.
171 // For example, row offsets might each be 64 bits, but if their
172 // difference always fits in 32 bits, we may then safely use a
173 // 32-bit count_type.
174 const count_type count =
175 static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
176
177 // We pack first the global column indices and then pids (if any),
178 // However, if the number of entries in the row is zero, we pack nothing.
179 const count_type numEntToPack = (count == 0)
180 ? static_cast<count_type>(0)
181 : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
182
183 if (final) {
184 counts_(curInd) = numEntToPack;
185 }
186 update += numEntToPack;
187 }
188 }
189
190 // mfh 31 May 2017: Don't need init or join. If you have join, MUST
191 // have join both with and without volatile! Otherwise intrawarp
192 // joins are really slow on GPUs.
193
195 int getError () const {
196 auto error_h = Kokkos::create_mirror_view (error_);
197 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
198 // Note: In the UVM case, this would otherwise be a no-op
199 // and thus not fence, so the value might not be correct on return
200 // In the non-UVM case, create_mirror_view will block for the allocation
201 Kokkos::deep_copy (error_h, error_);
202
203 return error_h ();
204 }
205
206private:
207 OutputOffsetsViewType outputOffsets_;
208 CountsViewType counts_;
209 typename InputOffsetsViewType::const_type rowOffsets_;
210 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
211 typename InputLocalRowPidsViewType::const_type lclRowPids_;
212 Kokkos::View<int, device_type> error_;
213};
214
224template<class OutputOffsetsViewType,
225 class CountsViewType,
226 class InputOffsetsViewType,
227 class InputLocalRowIndicesViewType,
228 class InputLocalRowPidsViewType>
229typename CountsViewType::non_const_value_type
230computeNumPacketsAndOffsets(const OutputOffsetsViewType& outputOffsets,
231 const CountsViewType& counts,
232 const InputOffsetsViewType& rowOffsets,
233 const InputLocalRowIndicesViewType& lclRowInds,
234 const InputLocalRowPidsViewType& lclRowPids)
235{
236 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
237 CountsViewType, typename InputOffsetsViewType::const_type,
238 typename InputLocalRowIndicesViewType::const_type,
239 typename InputLocalRowPidsViewType::const_type> functor_type;
240 typedef typename CountsViewType::non_const_value_type count_type;
241 typedef typename OutputOffsetsViewType::size_type size_type;
242 typedef typename OutputOffsetsViewType::execution_space execution_space;
243 typedef typename functor_type::local_row_index_type LO;
244 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
245 const char prefix[] = "computeNumPacketsAndOffsets: ";
246
247 count_type count = 0;
248 const count_type numRowsToPack = lclRowInds.extent (0);
249
250 if (numRowsToPack == 0) {
251 return count;
252 }
253 else {
254 TEUCHOS_TEST_FOR_EXCEPTION
255 (rowOffsets.extent (0) <= static_cast<size_type> (1),
256 std::invalid_argument, prefix << "There is at least one row to pack, "
257 "but the graph has no rows. lclRowInds.extent(0) = " <<
258 numRowsToPack << ", but rowOffsets.extent(0) = " <<
259 rowOffsets.extent (0) << " <= 1.");
260 TEUCHOS_TEST_FOR_EXCEPTION
261 (outputOffsets.extent (0) !=
262 static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
263 prefix << "Output dimension does not match number of rows to pack. "
264 << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
265 << " != lclRowInds.extent(0) + 1 = "
266 << static_cast<size_type> (numRowsToPack + 1) << ".");
267 TEUCHOS_TEST_FOR_EXCEPTION
268 (counts.extent (0) != numRowsToPack, std::invalid_argument,
269 prefix << "counts.extent(0) = " << counts.extent (0)
270 << " != numRowsToPack = " << numRowsToPack << ".");
271
272 functor_type f (outputOffsets, counts, rowOffsets, lclRowInds, lclRowPids);
273 Kokkos::parallel_scan ("Tpetra::Details::computeNumPacketsAndOffsets::scan", range_type (0, numRowsToPack + 1), f);
274
275 // At least in debug mode, this functor checks for errors.
276 const int errCode = f.getError ();
277 TEUCHOS_TEST_FOR_EXCEPTION
278 (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
279 << errCode << " != 0.");
280
281#if 0
282 size_t total = 0;
283 for (LO k = 0; k < numRowsToPack; ++k) {
284 total += counts[k];
285 }
286 if (outputOffsets(numRowsToPack) != total) {
287 if (errStr.get () == NULL) {
288 errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
289 }
290 std::ostringstream& os = *errStr;
291 os << prefix
292 << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
293 << outputOffsets(numRowsToPack) << " != sum of counts = "
294 << total << "." << std::endl;
295 if (numRowsToPack != 0) {
296 // Only print the array if it's not too long.
297 if (numRowsToPack < static_cast<LO> (10)) {
298 os << "outputOffsets: [";
299 for (LO i = 0; i <= numRowsToPack; ++i) {
300 os << outputOffsets(i);
301 if (static_cast<LO> (i + 1) <= numRowsToPack) {
302 os << ",";
303 }
304 }
305 os << "]" << std::endl;
306 os << "counts: [";
307 for (LO i = 0; i < numRowsToPack; ++i) {
308 os << counts(i);
309 if (static_cast<LO> (i + 1) < numRowsToPack) {
310 os << ",";
311 }
312 }
313 os << "]" << std::endl;
314 }
315 else {
316 os << "outputOffsets(" << (numRowsToPack-1) << ") = "
317 << outputOffsets(numRowsToPack-1) << "." << std::endl;
318 }
319 }
320 count = outputOffsets(numRowsToPack);
321 return {false, errStr};
322 }
323#endif // HAVE_TPETRA_DEBUG
324
325 // Get last entry of outputOffsets, which is the sum of the entries
326 // of counts. Don't assume UVM.
327 using Tpetra::Details::getEntryOnHost;
328 return static_cast<count_type> (getEntryOnHost (outputOffsets,
329 numRowsToPack));
330 }
331}
332
343template<class Packet,
344 class LocalMapType,
345 class BufferDeviceType,
346 class InputLidsType,
347 class InputPidsType>
348KOKKOS_FUNCTION
349size_t
350packRow(const LocalMapType& col_map,
351 const Kokkos::View<Packet*, BufferDeviceType>& exports,
352 const InputLidsType& lids_in,
353 const InputPidsType& pids_in,
354 const size_t offset,
355 const size_t num_ent,
356 const bool pack_pids)
357{
358 using LO = typename LocalMapType::local_ordinal_type;
359 using GO = typename LocalMapType::global_ordinal_type;
360
361 if (num_ent == 0) {
362 // Empty rows always take zero bytes, to ensure sparsity.
363 return static_cast<size_t>(0);
364 }
365
366 size_t num_ent_packed = num_ent;
367 if (pack_pids) {
368 num_ent_packed += num_ent;
369 }
370
371 // Copy column indices one at a time, so that we don't need
372 // temporary storage.
373 for (size_t k = 0; k < num_ent; ++k) {
374 const LO lid = lids_in[k];
375 const GO gid = col_map.getGlobalElement (lid);
376 exports(offset+k) = gid;
377 }
378 // Copy PIDs one at a time, so that we don't need temporary storage.
379 if (pack_pids) {
380 for (size_t k = 0; k < num_ent; ++k) {
381 const LO lid = lids_in[k];
382 const int pid = pids_in[lid];
383 exports(offset+num_ent+k) = static_cast<GO>(pid);
384 }
385 }
386
387 return num_ent_packed;
388}
389
390template<class Packet,
391 class LocalGraph,
392 class LocalMap,
393 class BufferDeviceType>
394struct PackCrsGraphFunctor {
395 using local_graph_type = LocalGraph;
396 using local_map_type = LocalMap;
397 using LO = typename local_map_type::local_ordinal_type;
398 using GO = typename local_map_type::global_ordinal_type;
399
400 using num_packets_per_lid_view_type =
401 Kokkos::View<const size_t*, BufferDeviceType>;
402 using offsets_view_type = Kokkos::View<const size_t*, BufferDeviceType>;
403 using exports_view_type = Kokkos::View<Packet*, BufferDeviceType>;
404 using export_lids_view_type =
406 using source_pids_view_type =
408
409 using count_type =
410 typename num_packets_per_lid_view_type::non_const_value_type;
411 using offset_type = typename offsets_view_type::non_const_value_type;
412 using value_type = Kokkos::pair<int, LO>;
413
414 static_assert (std::is_same<LO, typename local_graph_type::data_type>::value,
415 "local_map_type::local_ordinal_type and "
416 "local_graph_type::data_type must be the same.");
417
418 local_graph_type local_graph;
419 local_map_type local_col_map;
420 exports_view_type exports;
421 num_packets_per_lid_view_type num_packets_per_lid;
422 export_lids_view_type export_lids;
423 source_pids_view_type source_pids;
424 offsets_view_type offsets;
425 bool pack_pids;
426
427 PackCrsGraphFunctor(const local_graph_type& local_graph_in,
428 const local_map_type& local_col_map_in,
429 const exports_view_type& exports_in,
430 const num_packets_per_lid_view_type& num_packets_per_lid_in,
431 const export_lids_view_type& export_lids_in,
432 const source_pids_view_type& source_pids_in,
433 const offsets_view_type& offsets_in,
434 const bool pack_pids_in) :
435 local_graph (local_graph_in),
436 local_col_map (local_col_map_in),
437 exports (exports_in),
438 num_packets_per_lid (num_packets_per_lid_in),
439 export_lids (export_lids_in),
440 source_pids (source_pids_in),
441 offsets (offsets_in),
442 pack_pids (pack_pids_in)
443 {
444 const LO numRows = local_graph_in.numRows ();
445 const LO rowMapDim =
446 static_cast<LO> (local_graph.row_map.extent (0));
447 TEUCHOS_TEST_FOR_EXCEPTION
448 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
449 std::logic_error, "local_graph.row_map.extent(0) = "
450 << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
451 }
452
453 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
454 {
455 using ::Tpetra::Details::OrdinalTraits;
456 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
457 }
458
459 KOKKOS_INLINE_FUNCTION void
460 join (value_type& dst, const value_type& src) const
461 {
462 // `dst` should reflect the first (least) bad index and all other
463 // associated error codes and data, so prefer keeping it.
464 if (src.first != 0 && dst.first == 0) {
465 dst = src;
466 }
467 }
468
469 KOKKOS_INLINE_FUNCTION
470 void operator() (const LO i, value_type& dst) const
471 {
472 const size_t offset = offsets[i];
473 const LO export_lid = export_lids[i];
474 const size_t buf_size = exports.size();
475 const size_t num_packets_this_lid = num_packets_per_lid(i);
476 const size_t num_ent =
477 static_cast<size_t> (local_graph.row_map[export_lid+1]
478 - local_graph.row_map[export_lid]);
479
480 // Only pack this row's data if it has a nonzero number of
481 // entries. We can do this because receiving processes get the
482 // number of packets, and will know that zero packets means zero
483 // entries.
484 if (num_ent == 0) {
485 return;
486 }
487
488 if (export_lid >= static_cast<LO>(local_graph.numRows())) {
489 if (dst.first != 0) { // keep only the first error
490 dst = Kokkos::make_pair (1, i); // invalid row
491 }
492 return;
493 }
494 else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
495 if (dst.first != 0) { // keep only the first error
496 dst = Kokkos::make_pair (2, i); // out of bounds
497 }
498 return;
499 }
500
501 // We can now pack this row
502
503 // Since the graph is locally indexed on the calling process, we
504 // have to use its column Map (which it _must_ have in this case)
505 // to convert to global indices.
506 const auto row_beg = local_graph.row_map[export_lid];
507 const auto row_end = local_graph.row_map[export_lid + 1];
508 auto lids_in = Kokkos::subview (local_graph.entries,
509 Kokkos::make_pair (row_beg, row_end));
510 size_t num_ent_packed_this_row =
511 packRow (local_col_map, exports, lids_in,
512 source_pids, offset, num_ent, pack_pids);
513 if (num_ent_packed_this_row != num_packets_this_lid) {
514 if (dst.first != 0) { // keep only the first error
515 dst = Kokkos::make_pair (3, i);
516 }
517 }
518 }
519};
520
528template<class Packet,
529 class LocalGraph,
530 class LocalMap,
531 class BufferDeviceType>
532void
533do_pack(const LocalGraph& local_graph,
534 const LocalMap& local_map,
535 const Kokkos::View<Packet*, BufferDeviceType>& exports,
536 const typename PackTraits<
537 size_t
538 >::input_array_type& num_packets_per_lid,
539 const typename PackTraits<
541 >::input_array_type& export_lids,
542 const typename PackTraits<
543 int
544 >::input_array_type& source_pids,
545 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
546 const bool pack_pids)
547{
548 using LO = typename LocalMap::local_ordinal_type;
549 using execution_space = typename LocalGraph::device_type::execution_space;
550 using range_type = Kokkos::RangePolicy<execution_space, LO>;
551 const char prefix[] = "Tpetra::Details::PackCrsGraphImpl::do_pack: ";
552
553 if (export_lids.extent (0) != 0) {
554 TEUCHOS_TEST_FOR_EXCEPTION
555 (static_cast<size_t> (offsets.extent (0)) !=
556 static_cast<size_t> (export_lids.extent (0) + 1),
557 std::invalid_argument, prefix << "offsets.extent(0) = "
558 << offsets.extent (0) << " != export_lids.extent(0) (= "
559 << export_lids.extent (0) << ") + 1.");
560 TEUCHOS_TEST_FOR_EXCEPTION
561 (export_lids.extent (0) != num_packets_per_lid.extent (0),
562 std::invalid_argument, prefix << "export_lids.extent(0) = " <<
563 export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
564 << num_packets_per_lid.extent (0) << ".");
565 // If exports has nonzero length at this point, then the graph
566 // has at least one entry to pack. Thus, if packing process
567 // ranks, we had better have at least one process rank to pack.
568 TEUCHOS_TEST_FOR_EXCEPTION
569 (pack_pids && exports.extent (0) != 0 &&
570 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
571 "pack_pids is true, and exports.extent(0) = " <<
572 exports.extent (0) << " != 0, meaning that we need to pack at "
573 "least one graph entry, but source_pids.extent(0) = 0.");
574 }
575
576 using pack_functor_type =
577 PackCrsGraphFunctor<Packet, LocalGraph, LocalMap,
578 BufferDeviceType>;
579 pack_functor_type f (local_graph, local_map, exports,
580 num_packets_per_lid, export_lids,
581 source_pids, offsets, pack_pids);
582
583 typename pack_functor_type::value_type result;
584 range_type range (0, num_packets_per_lid.extent (0));
585 Kokkos::parallel_reduce ("Tpetra::Details::computeNumPacketsAndOffsets::reduce",range, f, result);
586
587 if (result.first != 0) {
588 // We can't deep_copy from AnonymousSpace Views, so we can't
589 // print out any information from them in case of error.
590 std::ostringstream os;
591 if (result.first == 1) { // invalid local row index
592 os << "invalid local row index";
593 }
594 else if (result.first == 2) { // invalid offset
595 os << "invalid offset";
596 }
597 TEUCHOS_TEST_FOR_EXCEPTION
598 (true, std::runtime_error, prefix << "PackCrsGraphFunctor "
599 "reported error code " << result.first << " (" << os.str ()
600 << ") for the first bad row " << result.second << ".");
601 }
602}
603
630template<typename LO, typename GO, typename NT>
631void
633(const CrsGraph<LO,GO,NT>& sourceGraph,
634 Kokkos::DualView<
637 >& exports,
638 const Kokkos::View<
639 size_t*,
641 >& num_packets_per_lid,
642 const Kokkos::View<
643 const LO*,
645 >& export_lids,
646 const Kokkos::View<
647 const int*,
649 >& export_pids,
650 size_t& constant_num_packets,
651 const bool pack_pids)
652{
653 using Kokkos::View;
655 using packet_type = typename crs_graph_type::packet_type;
656 using buffer_device_type = typename crs_graph_type::buffer_device_type;
657 using exports_view_type = Kokkos::DualView<packet_type*, buffer_device_type>;
658 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
659 using local_map_type = typename Tpetra::Map<LO, GO, NT>::local_map_type;
660 const char prefix[] = "Tpetra::Details::packCrsGraph: ";
661 constexpr bool debug = false;
662
663 local_graph_device_type local_graph = sourceGraph.getLocalGraphDevice ();
664 local_map_type local_col_map = sourceGraph.getColMap ()->getLocalMap ();
665
666 // Setting this to zero tells the caller to expect a possibly
667 // different ("nonconstant") number of packets per local index
668 // (i.e., a possibly different number of entries per row).
669 constant_num_packets = 0;
670
671 const size_t num_export_lids (export_lids.extent (0));
672 TEUCHOS_TEST_FOR_EXCEPTION
673 (num_export_lids != size_t (num_packets_per_lid.extent (0)),
674 std::invalid_argument, prefix << "num_export_lids.extent(0) = "
675 << num_export_lids << " != num_packets_per_lid.extent(0) = "
676 << num_packets_per_lid.extent (0) << ".");
677 if (num_export_lids != 0) {
678 TEUCHOS_TEST_FOR_EXCEPTION
679 (num_packets_per_lid.data () == nullptr, std::invalid_argument,
680 prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
681 "num_packets_per_lid.data() = "
682 << num_packets_per_lid.data () << " == NULL.");
683 }
684
685 if (num_export_lids == 0) {
686 exports = exports_view_type ("exports", 0);
687 return;
688 }
689
690 // Array of offsets into the pack buffer.
691 View<size_t*, buffer_device_type> offsets ("offsets", num_export_lids + 1);
692
693 // Compute number of packets per LID (row to send), as well as
694 // corresponding offsets (the prefix sum of the packet counts).
695 const size_t count =
696 computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
697 local_graph.row_map, export_lids, export_pids);
698
699 // Resize the output pack buffer if needed.
700 if (count > size_t (exports.extent (0))) {
701 exports = exports_view_type ("exports", count);
702 if (debug) {
703 std::ostringstream os;
704 os << "*** exports resized to " << count << std::endl;
705 std::cerr << os.str ();
706 }
707 }
708 if (debug) {
709 std::ostringstream os;
710 os << "*** count: " << count << ", exports.extent(0): "
711 << exports.extent (0) << std::endl;
712 std::cerr << os.str ();
713 }
714
715 // If exports has nonzero length at this point, then the graph has
716 // at least one entry to pack. Thus, if packing process ranks, we
717 // had better have at least one process rank to pack.
718 TEUCHOS_TEST_FOR_EXCEPTION
719 (pack_pids && exports.extent (0) != 0 &&
720 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
721 "pack_pids is true, and exports.extent(0) = " <<
722 exports.extent (0) << " != 0, meaning that we need to pack at least "
723 "one graph entry, but export_pids.extent(0) = 0.");
724
725 exports.modify_device ();
726 auto exports_d = exports.view_device ();
728 (local_graph, local_col_map, exports_d, num_packets_per_lid,
729 export_lids, export_pids, offsets, pack_pids);
730 // If we got this far, we succeeded.
731}
732
733} // namespace PackCrsGraphImpl
734
735template<typename LO, typename GO, typename NT>
736void
738 Teuchos::Array<typename CrsGraph<LO,GO,NT>::packet_type>& exports,
739 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
740 const Teuchos::ArrayView<const LO>& exportLIDs,
741 size_t& constantNumPackets)
742{
743 using Kokkos::HostSpace;
744 using Kokkos::MemoryUnmanaged;
745 using Kokkos::View;
747 using packet_type = typename crs_graph_type::packet_type;
748 using BDT = typename crs_graph_type::buffer_device_type;
749
750 // Convert all Teuchos::Array to Kokkos::View
751
752 // This is an output array, so we don't have to copy to device here.
753 // However, we'll have to remember to copy back to host when done.
754 BDT outputDevice;
755 View<size_t*, BDT> num_packets_per_lid_d =
757 numPacketsPerLID.getRawPtr (),
758 numPacketsPerLID.size (), false,
759 "num_packets_per_lid");
760 // This is an input array, so we have to copy to device here.
761 // However, we never need to copy it back to host.
762 View<const LO*, BDT> export_lids_d =
764 exportLIDs.getRawPtr (),
765 exportLIDs.size (), true,
766 "export_lids");
767 View<const int*, BDT> export_pids_d;
768 Kokkos::DualView<packet_type*, BDT> exports_dv;
769 constexpr bool pack_pids = false;
770
771 static_assert
772 (std::is_same<
773 typename decltype (num_packets_per_lid_d)::non_const_value_type,
774 size_t>::value,
775 "num_packets_per_lid_d's non_const_value_type should be size_t.");
776 static_assert
777 (std::is_same<
778 typename decltype (num_packets_per_lid_d)::device_type,
779 BDT>::value,
780 "num_packets_per_lid_d's BDT should be size_t.");
781 static_assert
782 (std::is_same<
783 typename decltype (export_lids_d)::device_type,
784 BDT>::value,
785 "export_lids_d's device_type should be BDT.");
786 static_assert
787 (std::is_same<
788 typename decltype (export_pids_d)::non_const_value_type,
789 int>::value,
790 "export_pids_d's non_const_value_type should be int.");
791 static_assert
792 (std::is_same<
793 typename decltype (export_pids_d)::device_type,
794 BDT>::value,
795 "export_pids_d's device_type should be BDT.");
796
798 (sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
799 export_pids_d, constantNumPackets, pack_pids);
800
801 // The counts are an output of packCrsGraph, so we have to copy
802 // them back to host.
803 View<size_t*, HostSpace, MemoryUnmanaged>
804 num_packets_per_lid_h (numPacketsPerLID.getRawPtr (),
805 numPacketsPerLID.size ());
806
807 // DEEP_COPY REVIEW - DEVICE-TO-HOST
808 using execution_space = typename BDT::execution_space;
809 Kokkos::deep_copy (execution_space(), num_packets_per_lid_h, num_packets_per_lid_d);
810
811 // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
812 // exports_dv above, then we have two host copies for exports_h.
813
814 // The exports are an output of packCrsGraph, so we have to
815 // copy them back to host.
816 if (static_cast<size_t> (exports.size ()) !=
817 static_cast<size_t> (exports_dv.extent (0))) {
818 exports.resize (exports_dv.extent (0));
819 }
820 View<packet_type*, HostSpace, MemoryUnmanaged>
821 exports_h (exports.getRawPtr (), exports.size ());
822 // DEEP_COPY REVIEW - DEVICE-TO-HOST
823 Kokkos::deep_copy (execution_space(), exports_h, exports_dv.view_device());
824 execution_space().fence();
825}
826
829template<typename LO, typename GO, typename NT>
830void
832 const Kokkos::DualView<
833 const LO*,
835 >& export_lids,
836 const Kokkos::DualView<
837 const int*,
839 >& export_pids,
840 Kokkos::DualView<
843 Kokkos::DualView<
844 size_t*,
846 > num_packets_per_lid,
847 size_t& constant_num_packets,
848 const bool pack_pids)
849{
850 using Kokkos::View;
852 using BDT = typename crs_graph_type::buffer_device_type;
853 using PT = typename crs_graph_type::packet_type;
854 using exports_dual_view_type = Kokkos::DualView<PT*, BDT>;
855 using LGT = typename crs_graph_type::local_graph_device_type;
856 using LMT = typename crs_graph_type::map_type::local_map_type;
857 const char prefix[] = "Tpetra::Details::packCrsGraphNew: ";
858
859 const LGT local_graph = sourceGraph.getLocalGraphDevice ();
860 const LMT local_col_map = sourceGraph.getColMap ()->getLocalMap ();
861
862 // Setting this to zero tells the caller to expect a possibly
863 // different ("nonconstant") number of packets per local index
864 // (i.e., a possibly different number of entries per row).
865 constant_num_packets = 0;
866
867 const size_t num_export_lids =
868 static_cast<size_t> (export_lids.extent (0));
869 TEUCHOS_TEST_FOR_EXCEPTION
870 (num_export_lids !=
871 static_cast<size_t> (num_packets_per_lid.extent (0)),
872 std::invalid_argument, prefix << "num_export_lids.extent(0) = "
873 << num_export_lids << " != num_packets_per_lid.extent(0) = "
874 << num_packets_per_lid.extent (0) << ".");
875 TEUCHOS_TEST_FOR_EXCEPTION
876 (num_export_lids != 0 &&
877 num_packets_per_lid.view_device ().data () == nullptr,
878 std::invalid_argument, prefix << "num_export_lids = "<< num_export_lids
879 << " != 0, but num_packets_per_lid.view_device().data() = nullptr.");
880
881 if (num_export_lids == 0) {
882 exports = exports_dual_view_type ();
883 return;
884 }
885
886 // Array of offsets into the pack buffer.
887 using offsets_type = Kokkos::View<size_t*, BDT>;
888 offsets_type offsets ("offsets", num_export_lids + 1);
889
890 // Compute number of packets per LID (row to send), as well as
891 // corresponding offsets (the prefix sum of the packet counts).
892 num_packets_per_lid.clear_sync_state ();
893 num_packets_per_lid.modify_device ();
895 const size_t count =
896 computeNumPacketsAndOffsets (offsets, num_packets_per_lid.view_device (),
897 local_graph.row_map,
898 export_lids.view_device (),
899 export_pids.view_device ());
900
901 // Resize the output pack buffer if needed.
902 if (count > static_cast<size_t> (exports.extent (0))) {
903 exports = exports_dual_view_type ("exports", count);
904 }
905
906 // If exports has nonzero length at this point, then the graph has
907 // at least one entry to pack. Thus, if packing process ranks, we
908 // had better have at least one process rank to pack.
909 TEUCHOS_TEST_FOR_EXCEPTION
910 (pack_pids && exports.extent (0) != 0 &&
911 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
912 "pack_pids is true, and exports.extent(0) = " <<
913 exports.extent (0) << " != 0, meaning that we need to pack at least "
914 "one graph entry, but export_pids.extent(0) = 0.");
915
916 exports.modify_device ();
918 do_pack<PT, LGT, LMT, BDT> (local_graph, local_col_map,
919 exports.view_device (),
920 num_packets_per_lid.view_device (),
921 export_lids.view_device (),
922 export_pids.view_device (),
923 offsets, pack_pids);
924}
925
926template<typename LO, typename GO, typename NT>
927void
929(const CrsGraph<LO, GO, NT>& sourceGraph,
930 Kokkos::DualView<
933 >& exports_dv,
934 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
935 const Teuchos::ArrayView<const LO>& exportLIDs,
936 const Teuchos::ArrayView<const int>& sourcePIDs,
937 size_t& constantNumPackets)
938{
939 using Kokkos::HostSpace;
940 using Kokkos::MemoryUnmanaged;
941 using Kokkos::View;
943 using buffer_device_type = typename crs_graph_type::buffer_device_type;
944
945 // Convert all Teuchos::Array to Kokkos::View
946
947 // This is an output array, so we don't have to copy to device here.
948 // However, we'll have to remember to copy back to host when done.
949 View<size_t*, buffer_device_type> num_packets_per_lid_d =
951 numPacketsPerLID.getRawPtr (),
952 numPacketsPerLID.size (), false,
953 "num_packets_per_lid");
954
955 // This is an input array, so we have to copy to device here.
956 // However, we never need to copy it back to host.
957 View<const LO*, buffer_device_type> export_lids_d =
959 exportLIDs.getRawPtr (),
960 exportLIDs.size (), true,
961 "export_lids");
962 // This is an input array, so we have to copy to device here.
963 // However, we never need to copy it back to host.
964 View<const int*, buffer_device_type> export_pids_d =
966 sourcePIDs.getRawPtr (),
967 sourcePIDs.size (), true,
968 "export_pids");
969 constexpr bool pack_pids = true;
971 (sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
972 export_pids_d, constantNumPackets, pack_pids);
973
974 // The counts are an output of packCrsGraph, so we
975 // have to copy them back to host.
976 View<size_t*, HostSpace, MemoryUnmanaged> num_packets_per_lid_h
977 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
978 // DEEP_COPY REVIEW - DEVICE-TO-HOST
979 using execution_space = typename buffer_device_type::execution_space;
980 Kokkos::deep_copy (execution_space(),
981 num_packets_per_lid_h, num_packets_per_lid_d);
982 execution_space().fence();
983}
984
985} // namespace Details
986} // namespace Tpetra
987
988#define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT( LO, GO, NT ) \
989 template void \
990 Details::packCrsGraph<LO, GO, NT> ( \
991 const CrsGraph<LO, GO, NT>&, \
992 Teuchos::Array<CrsGraph<LO,GO,NT>::packet_type>&, \
993 const Teuchos::ArrayView<size_t>&, \
994 const Teuchos::ArrayView<const LO>&, \
995 size_t&); \
996 template void \
997 Details::packCrsGraphNew<LO, GO, NT> ( \
998 const CrsGraph<LO, GO, NT>&, \
999 const Kokkos::DualView< \
1000 const LO*, \
1001 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1002 const Kokkos::DualView< \
1003 const int*, \
1004 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1005 Kokkos::DualView< \
1006 CrsGraph<LO,GO,NT>::packet_type*, \
1007 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1008 Kokkos::DualView< \
1009 size_t*, \
1010 CrsGraph<LO,GO,NT>::buffer_device_type>, \
1011 size_t&, \
1012 const bool); \
1013 template void \
1014 Details::packCrsGraphWithOwningPIDs<LO, GO, NT> ( \
1015 const CrsGraph<LO, GO, NT>&, \
1016 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1017 const Teuchos::ArrayView<size_t>&, \
1018 const Teuchos::ArrayView<const LO>&, \
1019 const Teuchos::ArrayView<const int>&, \
1020 size_t&);
1021
1022#endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
Declaration of the Tpetra::CrsGraph class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
CountsViewType::non_const_value_type computeNumPacketsAndOffsets(const OutputOffsetsViewType &outputOffsets, const CountsViewType &counts, const InputOffsetsViewType &rowOffsets, const InputLocalRowIndicesViewType &lclRowInds, const InputLocalRowPidsViewType &lclRowPids)
Compute the number of packets and offsets for the pack procedure.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, const Kokkos::View< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &num_packets_per_lid, const Kokkos::View< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &export_lids, const Kokkos::View< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &export_pids, size_t &constant_num_packets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication.
void do_pack(const LocalGraph &local_graph, const LocalMap &local_map, const Kokkos::View< Packet *, BufferDeviceType > &exports, const typename PackTraits< size_t >::input_array_type &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type >::input_array_type &export_lids, const typename PackTraits< int >::input_array_type &source_pids, const Kokkos::View< const size_t *, BufferDeviceType > &offsets, const bool pack_pids)
Perform the pack operation for the graph.
KOKKOS_FUNCTION size_t packRow(const LocalMapType &col_map, const Kokkos::View< Packet *, BufferDeviceType > &exports, const InputLidsType &lids_in, const InputPidsType &pids_in, const size_t offset, const size_t num_ent, const bool pack_pids)
Packs a single row of the CrsGraph.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
global_ordinal_type packet_type
Type of each entry of the DistObject communication buffer.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
::Tpetra::Details::LocalMap< local_ordinal_type, global_ordinal_type, device_type > local_map_type
Type of the "local" Map.
Nonmember function that computes a residual Computes R = B - A * X.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Traits class for packing / unpacking data of type T.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.