Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_FixedHashTable_def.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
11#define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
12
13#include "Tpetra_Details_FixedHashTable_decl.hpp"
15#ifdef TPETRA_USE_MURMUR_HASH
16# include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
17#endif // TPETRA_USE_MURMUR_HASH
18#include "Kokkos_ArithTraits.hpp"
19#include "Teuchos_TypeNameTraits.hpp"
21#include <type_traits>
22
23namespace Tpetra {
24namespace Details {
25//
26// This namespace stores utility functions and Kokkos
27// functors for use in FixedHashTable construction.
28//
29namespace FHT {
30
31
32// Is it worth actually using building the FixedHashTable using
33// parallel threads, instead of just counting in a sequential loop?
34//
35// The parallel version of FixedHashTable construction isn't just a
36// parallelization of the sequential loops. It incurs additional
37// overheads. For example, the CountBuckets kernel uses atomic update
38// instructions to count the number of "buckets" per offsets array
39// (ptr) entry. Atomic updates have overhead, even if only one thread
40// issues them. The Kokkos kernels are still correct in that case,
41// but I would rather not incur overhead then. It might make sense to
42// set the minimum number of threads to something greater than 1, but
43// we would need experiments to find out.
44template<class ExecSpace>
45bool worthBuildingFixedHashTableInParallel () {
46 return ExecSpace().concurrency() > 1;
47}
48
49//
50// Functors for FixedHashTable initialization
51//
52// NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
53// should consider replacing all of these functors with in-line
54// lambdas. The only issue is that we would need to bake the
55// execution space into the policy, since the default execution space
56// might differ from the one Tpetra wants to use.
57
67template<class CountsViewType,
68 class KeysViewType,
69 class SizeType = typename KeysViewType::size_type>
71public:
72 typedef CountsViewType counts_view_type;
73 typedef KeysViewType keys_view_type;
74 typedef typename CountsViewType::execution_space execution_space;
75 typedef typename CountsViewType::memory_space memory_space;
76 typedef SizeType size_type;
77 typedef typename keys_view_type::non_const_value_type key_type;
78 // mfh 21 May 2015: Having a device_type typedef in the functor
79 // along with an execution_space typedef causes compilation issues.
80 // This is because one of Kokkos' partial specializations picks up
81 // on the device_type typedef, and another picks up on the
82 // execution_space typedef. The former is a legacy of a previous
83 // design iteration of Kokkos, which did not separate memory and
84 // execution spaces.
86
92 CountBuckets (const counts_view_type& counts,
93 const keys_view_type& keys,
94 const size_type size) :
95 counts_ (counts),
96 keys_ (keys),
97 size_ (size)
98 {}
99
104 KOKKOS_INLINE_FUNCTION void
105 operator () (const size_type& i) const
106 {
107 typedef typename hash_type::result_type hash_value_type;
108
109 const hash_value_type hashVal = hash_type::hashFunc (keys_[i], size_);
110 Kokkos::atomic_inc (&counts_[hashVal]);
111 }
112
113 using value_type = Kokkos::pair<int, key_type>;
114
118 KOKKOS_INLINE_FUNCTION void
119 operator () (const size_type& i, value_type& dst) const
120 {
121 using hash_value_type = typename hash_type::result_type;
122
123 const key_type keyVal = keys_[i];
124 const hash_value_type hashVal = hash_type::hashFunc (keyVal, size_);
125 if (hashVal < hash_value_type (0) ||
126 hashVal >= hash_value_type (counts_.extent (0))) {
127 dst.first = 1;
128 dst.second = keyVal;
129 }
130 else {
131 Kokkos::atomic_inc (&counts_[hashVal]);
132 }
133 }
134
136 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
137 {
138 dst.first = 0;
139 dst.second = key_type (0);
140 }
141
142 KOKKOS_INLINE_FUNCTION void
143 join (value_type& dst,
144 const value_type& src) const
145 {
146 const bool good = dst.first == 0 || src.first == 0;
147 dst.first = good ? 0 : dst.first;
148 // leave dst.second as it is, to get the "first" key
149 }
150
151private:
153 counts_view_type counts_;
155 keys_view_type keys_;
157 size_type size_;
158};
159
170template<class KeyType>
171struct FillPairsResult {
172 KOKKOS_INLINE_FUNCTION
173 FillPairsResult () :
174 minKey_ (::Kokkos::ArithTraits<KeyType>::max ()),
175 // min() for a floating-point type returns the minimum _positive_
176 // normalized value. This is different than for integer types.
177 // lowest() is new in C++11 and returns the least value, always
178 // negative for signed finite types.
179 //
180 // mfh 23 May 2015: I have heard reports that
181 // std::numeric_limits<int>::lowest() does not exist with the
182 // Intel compiler. I'm not sure if the users in question actually
183 // enabled C++11. However, it's easy enough to work around this
184 // issue. The standard floating-point types are signed and have a
185 // sign bit, so lowest() is just -max(). For integer types, we
186 // can use min() instead.
187 maxKey_ (::Kokkos::ArithTraits<KeyType>::is_integer ?
188 ::Kokkos::ArithTraits<KeyType>::min () :
189 -::Kokkos::ArithTraits<KeyType>::max ()),
190 success_ (true)
191 {
192 static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
193 "KeyType must be some kind of number type.");
194 }
195
196 KOKKOS_INLINE_FUNCTION
197 FillPairsResult (const KeyType& initMinKey,
198 const KeyType& initMaxKey) :
199 minKey_ (initMinKey),
200 maxKey_ (initMaxKey),
201 success_ (true)
202 {
203 static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
204 "KeyType must be some kind of number type.");
205 }
206
207 KeyType minKey_;
208 KeyType maxKey_;
209 bool success_;
210};
211
241template<class PairsViewType,
242 class KeysViewType,
243 class CountsViewType,
244 class SizeType = typename KeysViewType::size_type>
246public:
247 typedef typename CountsViewType::non_const_type counts_view_type;
248 typedef typename counts_view_type::const_type offsets_view_type;
249
250 typedef PairsViewType pairs_view_type;
251 typedef typename KeysViewType::const_type keys_view_type;
252 typedef typename offsets_view_type::execution_space execution_space;
253 typedef typename offsets_view_type::memory_space memory_space;
254 typedef SizeType size_type;
255
256 typedef typename keys_view_type::non_const_value_type key_type;
257 typedef typename pairs_view_type::non_const_value_type pair_type;
258
259 typedef FillPairsResult<key_type> value_type;
260
261 // mfh 23 May 2015: Having a device_type typedef in the functor
262 // along with an execution_space typedef causes compilation issues.
263 // This is because one of Kokkos' partial specializations picks up
264 // on the device_type typedef, and another picks up on the
265 // execution_space typedef. The former is a legacy of a previous
266 // design iteration of Kokkos, which did not separate memory and
267 // execution spaces.
269
280 FillPairs (const pairs_view_type& pairs,
281 const counts_view_type& counts,
282 const offsets_view_type& ptr,
283 const keys_view_type& keys,
284 const typename pair_type::second_type startingValue) :
285 pairs_ (pairs),
286 counts_ (counts),
287 ptr_ (ptr),
288 keys_ (keys),
289 size_ (counts.extent (0)),
290 startingValue_ (startingValue),
291 initMinKey_ (::Kokkos::ArithTraits<key_type>::max ()),
292 initMaxKey_ (::Kokkos::ArithTraits<key_type>::is_integer ?
293 ::Kokkos::ArithTraits<key_type>::min () :
294 -::Kokkos::ArithTraits<key_type>::max ())
295 {}
296
315 FillPairs (const pairs_view_type& pairs,
316 const counts_view_type& counts,
317 const offsets_view_type& ptr,
318 const keys_view_type& keys,
319 const typename pair_type::second_type startingValue,
320 const key_type initMinKey,
321 const key_type initMaxKey) :
322 pairs_ (pairs),
323 counts_ (counts),
324 ptr_ (ptr),
325 keys_ (keys),
326 size_ (counts.extent (0)),
327 startingValue_ (startingValue),
328 initMinKey_ (initMinKey),
329 initMaxKey_ (initMaxKey)
330 {}
331
333 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
334 {
335 dst.minKey_ = initMinKey_;
336 dst.maxKey_ = initMaxKey_;
337 dst.success_ = true;
338 }
339
340 KOKKOS_INLINE_FUNCTION void
341 join (value_type& dst,
342 const value_type& src) const
343 {
344 if (src.maxKey_ > dst.maxKey_) {
345 dst.maxKey_ = src.maxKey_;
346 }
347 if (src.minKey_ < dst.minKey_) {
348 dst.minKey_ = src.minKey_;
349 }
350 dst.success_ = dst.success_ && src.success_;
351 }
352
357 KOKKOS_INLINE_FUNCTION void
358 operator () (const size_type& i, value_type& dst) const
359 {
360 typedef typename hash_type::result_type hash_value_type;
361 typedef typename offsets_view_type::non_const_value_type offset_type;
362 typedef typename pair_type::second_type val_type;
363 typedef typename std::remove_reference< decltype( counts_[0] ) >::type atomic_incr_type;
364
365 const key_type key = keys_[i];
366 if (key > dst.maxKey_) {
367 dst.maxKey_ = key;
368 }
369 if (key < dst.minKey_) {
370 dst.minKey_ = key;
372 const val_type theVal = startingValue_ + static_cast<val_type> (i);
373 const hash_value_type hashVal = hash_type::hashFunc (key, size_);
374
375 // Return the old count; decrement afterwards.
376 const offset_type count = Kokkos::atomic_fetch_add (&counts_[hashVal], atomic_incr_type(-1));
377 if (count == 0) {
378 dst.success_ = false; // FAILURE!
379 }
380 else {
381 const offset_type curPos = ptr_[hashVal+1] - count;
382
383 pairs_[curPos].first = key;
384 pairs_[curPos].second = theVal;
385 }
386 }
387
388private:
389 pairs_view_type pairs_;
390 counts_view_type counts_;
391 offsets_view_type ptr_;
392 keys_view_type keys_;
393 size_type size_;
394 typename pair_type::second_type startingValue_;
396 key_type initMinKey_;
398 key_type initMaxKey_;
399};
400
424template<class OffsetsViewType,
425 class PairsViewType,
426 class SizeType = typename OffsetsViewType::size_type>
428public:
429 typedef typename OffsetsViewType::const_type offsets_view_type;
430 typedef typename PairsViewType::const_type pairs_view_type;
431 typedef typename offsets_view_type::execution_space execution_space;
432 typedef typename offsets_view_type::memory_space memory_space;
433 typedef SizeType size_type;
434
435 // The result of the check is whether the table has one or more duplicates.
436 typedef int value_type;
437
442 CheckForDuplicateKeys (const pairs_view_type& pairs,
443 const offsets_view_type& ptr) :
444 pairs_ (pairs),
445 ptr_ (ptr),
446 size_ (ptr_.extent (0) == 0 ?
447 size_type (0) :
448 ptr_.extent (0) - 1)
449 {}
450
452 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
453 {
454 dst = 0;
455 }
456
458 KOKKOS_INLINE_FUNCTION void
459 join (value_type& dst,
460 const value_type& src) const
461 {
462 dst = dst + src > 0?1:0;
463 }
464
466 KOKKOS_INLINE_FUNCTION void
467 operator () (const size_type& i, value_type& dst) const
468 {
469 typedef typename offsets_view_type::non_const_value_type offset_type;
470 typedef typename pairs_view_type::non_const_value_type pair_type;
471 typedef typename pair_type::first_type key_type;
472
473 if (dst>0) {
474 return; // we've already found duplicate keys elsewhere
475 }
476 else {
477 const offset_type beg = ptr_[i];
478 const offset_type end = ptr_[i+1];
479 bool foundDuplicateKey = false;
480 // This is an ~ n^2 algorithm in the worst case, where n is the
481 // max number of keys that hash to the same bucket. However, if
482 // the hash function is reasonable, n should be much less than
483 // the total number of keys.
484 for (offset_type j = beg + 1; j < end; ++j) {
485 const key_type curKey = pairs_[j].first;
486 for (offset_type k = beg; k < j; ++k) {
487 if (pairs_[k].first == curKey) {
488 foundDuplicateKey = true;
489 break;
490 }
491 }
492 }
493 dst = (dst>0) || foundDuplicateKey?1:0;
494 }
495 }
496
497private:
498 pairs_view_type pairs_;
499 offsets_view_type ptr_;
500 size_type size_;
501};
502
503} // namespace FHT
504
505//
506// Here begins the actual implementation of FixedHashTable.
507//
508
509template<class KeyType, class ValueType, class DeviceType>
511FixedHashTable (const keys_type& keys) :
512 minVal_ (0),
513 maxVal_ (keys.size () == 0 ?
514 static_cast<ValueType> (0) :
515 static_cast<ValueType> (keys.size () - 1)),
516 checkedForDuplicateKeys_ (false)
517{
518 const ValueType startingValue = static_cast<ValueType> (0);
519 const KeyType initMinKey = this->minKey_;
520 const KeyType initMaxKey = this->maxKey_;
521 this->init (keys, startingValue, initMinKey, initMaxKey,
522 initMinKey, initMinKey, false);
523}
524
525template<class KeyType, class ValueType, class DeviceType>
527FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys) :
528 minVal_ (0),
529 maxVal_ (keys.size () == 0 ?
530 static_cast<ValueType> (0) :
531 static_cast<ValueType> (keys.size () - 1)),
532 checkedForDuplicateKeys_ (false)
533{
534 typedef typename keys_type::non_const_type nonconst_keys_type;
535
536 // mfh 01 May 2015: I don't trust that
537 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
538 // so I ensure this manually.
539 const ValueType startingValue = static_cast<ValueType> (0);
540 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
541 keys.size ());
542 using Kokkos::ViewAllocateWithoutInitializing;
543 nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
544 keys_k.extent (0));
545 // DEEP_COPY REVIEW - NOT TESTED
546 Kokkos::deep_copy (keys_d, keys_k);
547 const KeyType initMinKey = this->minKey_;
548 const KeyType initMaxKey = this->maxKey_;
549 this->init (keys_d, startingValue, initMinKey, initMaxKey,
550 initMinKey, initMinKey, false);
551}
552
553template<class KeyType, class ValueType, class DeviceType>
555FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
556 const ValueType startingValue) :
557 minVal_ (startingValue),
558 maxVal_ (keys.size () == 0 ?
559 startingValue :
560 static_cast<ValueType> (startingValue + keys.size () - 1)),
561 checkedForDuplicateKeys_ (false)
562{
563 typedef typename keys_type::non_const_type nonconst_keys_type;
564
565 // mfh 01 May 2015: I don't trust that
566 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
567 // so I ensure this manually.
568 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
569 keys.size ());
570 using Kokkos::ViewAllocateWithoutInitializing;
571 nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
572 keys_k.extent (0));
573 // DEEP_COPY REVIEW - HOST-TO_DEVICE
574 Kokkos::deep_copy (execution_space(), keys_d, keys_k);
575
576 const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max ();
577 // min() for a floating-point type returns the minimum _positive_
578 // normalized value. This is different than for integer types.
579 // lowest() is new in C++11 and returns the least value, always
580 // negative for signed finite types.
581 //
582 // mfh 23 May 2015: I have heard reports that
583 // std::numeric_limits<int>::lowest() does not exist with the Intel
584 // compiler. I'm not sure if the users in question actually enabled
585 // C++11. However, it's easy enough to work around this issue. The
586 // standard floating-point types are signed and have a sign bit, so
587 // lowest() is just -max(). For integer types, we can use min()
588 // instead.
589 const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ?
590 ::Kokkos::ArithTraits<KeyType>::min () :
591 -::Kokkos::ArithTraits<KeyType>::max ();
592 this->init (keys_d, startingValue, initMinKey, initMaxKey,
593 initMinKey, initMinKey, false);
594
595}
596
597template<class KeyType, class ValueType, class DeviceType>
599FixedHashTable (const keys_type& keys,
600 const KeyType firstContigKey,
601 const KeyType lastContigKey,
602 const ValueType startingValue) :
603 minVal_ (startingValue),
604 maxVal_ (keys.size () == 0 ?
605 startingValue :
606 static_cast<ValueType> (startingValue + keys.size () - 1)),
607 firstContigKey_ (firstContigKey),
608 lastContigKey_ (lastContigKey),
609 checkedForDuplicateKeys_ (false)
610{
611 const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max ();
612 // min() for a floating-point type returns the minimum _positive_
613 // normalized value. This is different than for integer types.
614 // lowest() is new in C++11 and returns the least value, always
615 // negative for signed finite types.
616 //
617 // mfh 23 May 2015: I have heard reports that
618 // std::numeric_limits<int>::lowest() does not exist with the Intel
619 // compiler. I'm not sure if the users in question actually enabled
620 // C++11. However, it's easy enough to work around this issue. The
621 // standard floating-point types are signed and have a sign bit, so
622 // lowest() is just -max(). For integer types, we can use min()
623 // instead.
624 const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ?
625 ::Kokkos::ArithTraits<KeyType>::min () :
626 -::Kokkos::ArithTraits<KeyType>::max ();
627 this->init (keys, startingValue, initMinKey, initMaxKey,
628 firstContigKey, lastContigKey, true);
629}
630
631template<class KeyType, class ValueType, class DeviceType>
633FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
634 const KeyType firstContigKey,
635 const KeyType lastContigKey,
636 const ValueType startingValue) :
637 minVal_ (startingValue),
638 maxVal_ (keys.size () == 0 ?
639 startingValue :
640 static_cast<ValueType> (startingValue + keys.size () - 1)),
641 firstContigKey_ (firstContigKey),
642 lastContigKey_ (lastContigKey),
643 checkedForDuplicateKeys_ (false)
644{
645 typedef typename keys_type::non_const_type nonconst_keys_type;
646
647 // mfh 01 May 2015: I don't trust that
648 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
649 // so I ensure this manually.
650 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
651 keys.size ());
652 using Kokkos::ViewAllocateWithoutInitializing;
653 nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
654 keys_k.extent (0));
655 // DEEP_COPY REVIEW - NOT TESTED
656 Kokkos::deep_copy (keys_d, keys_k);
657
658 const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max ();
659 // min() for a floating-point type returns the minimum _positive_
660 // normalized value. This is different than for integer types.
661 // lowest() is new in C++11 and returns the least value, always
662 // negative for signed finite types.
663 //
664 // mfh 23 May 2015: I have heard reports that
665 // std::numeric_limits<int>::lowest() does not exist with the Intel
666 // compiler. I'm not sure if the users in question actually enabled
667 // C++11. However, it's easy enough to work around this issue. The
668 // standard floating-point types are signed and have a sign bit, so
669 // lowest() is just -max(). For integer types, we can use min()
670 // instead.
671 const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ?
672 ::Kokkos::ArithTraits<KeyType>::min () :
673 -::Kokkos::ArithTraits<KeyType>::max ();
674 this->init (keys_d, startingValue, initMinKey, initMaxKey,
675 firstContigKey, lastContigKey, true);
676}
677
678template<class KeyType, class ValueType, class DeviceType>
680FixedHashTable (const keys_type& keys,
681 const ValueType startingValue) :
682 minVal_ (startingValue),
683 maxVal_ (keys.size () == 0 ?
684 startingValue :
685 static_cast<ValueType> (startingValue + keys.size () - 1)),
686 checkedForDuplicateKeys_ (false)
687{
688 const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max ();
689 // min() for a floating-point type returns the minimum _positive_
690 // normalized value. This is different than for integer types.
691 // lowest() is new in C++11 and returns the least value, always
692 // negative for signed finite types.
693 //
694 // mfh 23 May 2015: I have heard reports that
695 // std::numeric_limits<int>::lowest() does not exist with the Intel
696 // compiler. I'm not sure if the users in question actually enabled
697 // C++11. However, it's easy enough to work around this issue. The
698 // standard floating-point types are signed and have a sign bit, so
699 // lowest() is just -max(). For integer types, we can use min()
700 // instead.
701 const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ?
702 ::Kokkos::ArithTraits<KeyType>::min () :
703 -::Kokkos::ArithTraits<KeyType>::max ();
704 this->init (keys, startingValue, initMinKey, initMaxKey,
705 initMinKey, initMinKey, false);
706}
707
708template<class KeyType, class ValueType, class DeviceType>
710FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
711 const Teuchos::ArrayView<const ValueType>& vals) :
712 contiguousValues_ (false),
713 checkedForDuplicateKeys_ (false)
714{
715 // mfh 01 May 2015: I don't trust that
716 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
717 // so I ensure this manually.
718 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
719 keys.size ());
720 host_input_vals_type vals_k (vals.size () == 0 ? NULL : vals.getRawPtr (),
721 vals.size ());
722 const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max ();
723 // min() for a floating-point type returns the minimum _positive_
724 // normalized value. This is different than for integer types.
725 // lowest() is new in C++11 and returns the least value, always
726 // negative for signed finite types.
727 //
728 // mfh 23 May 2015: I have heard reports that
729 // std::numeric_limits<int>::lowest() does not exist with the Intel
730 // compiler. I'm not sure if the users in question actually enabled
731 // C++11. However, it's easy enough to work around this issue. The
732 // standard floating-point types are signed and have a sign bit, so
733 // lowest() is just -max(). For integer types, we can use min()
734 // instead.
735 const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ?
736 ::Kokkos::ArithTraits<KeyType>::min () :
737 -::Kokkos::ArithTraits<KeyType>::max ();
738 this->init (keys_k, vals_k, initMinKey, initMaxKey);
739}
740
741template<class KeyType, class ValueType, class DeviceType>
742void
744init (const keys_type& keys,
745 ValueType startingValue,
746 KeyType initMinKey,
747 KeyType initMaxKey,
748 KeyType firstContigKey,
749 KeyType lastContigKey,
750 const bool computeInitContigKeys)
751{
752 using Kokkos::subview;
753 using Kokkos::ViewAllocateWithoutInitializing;
754 using Teuchos::TypeNameTraits;
755 typedef typename std::decay<decltype (keys.extent (0)) >::type size_type;
756 Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(7-arg)");
757 const char prefix[] = "Tpetra::Details::FixedHashTable: ";
758
759 const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
760 {
761 const offset_type theMaxVal = ::Kokkos::ArithTraits<offset_type>::max ();
762 const size_type maxValST = static_cast<size_type> (theMaxVal);
763 TEUCHOS_TEST_FOR_EXCEPTION
764 (keys.extent (0) > maxValST, std::invalid_argument, prefix << "The "
765 "number of keys " << keys.extent (0) << " does not fit in "
766 "offset_type = " << TypeNameTraits<offset_type>::name () << ", whose "
767 "max value is " << theMaxVal << ". This means that it is not possible to "
768 "use this constructor.");
769 }
770 TEUCHOS_TEST_FOR_EXCEPTION
771 (static_cast<unsigned long long> (numKeys) >
772 static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ()),
773 std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
774 "keys " << numKeys << " is greater than the maximum representable "
775 "ValueType value " << ::Kokkos::ArithTraits<ValueType>::max () << ". "
776 "This means that it is not possible to use this constructor.");
777 TEUCHOS_TEST_FOR_EXCEPTION
778 (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, prefix <<
779 "This class currently only works when the number of keys is <= INT_MAX = "
780 << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
781 "developers.");
782
783 const bool buildInParallel =
784 FHT::worthBuildingFixedHashTableInParallel<execution_space> ();
785 const bool debug = ::Tpetra::Details::Behavior::debug ();
786
787 // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
788 // could change that by setting up ptr and val as Kokkos::DualView
789 // instances. If we do that, since we are filling on host for now,
790 // we want to make sure that we only zero-fill ptr on host
791 // initially, and that we don't fill val at all. Once we finish
792 // Kokkos-izing all the set-up kernels, we won't need DualView for
793 // either ptr or val.
794
795 if (computeInitContigKeys) {
796 // Find the first and last initial contiguous keys. If we find a
797 // long sequence of initial contiguous keys, we can save space by
798 // not storing them explicitly as pairs in the hash table.
799 //
800 // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
801 // ("min index such that the difference between the current key and
802 // the next != 1"), which takes multiple passes over the data. We
803 // could fuse it with CountBuckets (only update counts on 'final'
804 // pass). However, we're really just moving this sequential search
805 // out of Map's constructor here, so there is no loss in doing it
806 // sequentially for now. Later, we can work on parallelization.
807 if (numKeys > 0) {
808 // FIXME: make it a parallel kernel with no host copy
809 auto keys_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
810 keys);
811 firstContigKey_ = keys_h[0];
812 // Start with one plus, then decrement at the end. That lets us do
813 // only one addition per loop iteration, rather than two (if we test
814 // against lastContigKey + 1 and then increment lastContigKey).
815 lastContigKey_ = firstContigKey_ + 1;
816
817 // We will only store keys in the table that are not part of the
818 // initial contiguous sequence. It's possible for the initial
819 // contiguous sequence to be trivial, which for a nonzero number of
820 // keys means that the "sequence" has length 1.
821 for (offset_type k = 1; k < numKeys; ++k) {
822 if (lastContigKey_ != keys_h[k]) {
823 break;
824 }
825 ++lastContigKey_;
826 }
827 --lastContigKey_;
828 }
829 }
830 else {
831 firstContigKey_ = firstContigKey;
832 lastContigKey_ = lastContigKey;
833 }
834
835 offset_type startIndex;
836 if (numKeys > 0) {
837 initMinKey = std::min (initMinKey, firstContigKey_);
838 initMaxKey = std::max (initMaxKey, lastContigKey_);
839 startIndex = static_cast<offset_type> (lastContigKey_ - firstContigKey_);
840 } else {
841 startIndex = 0;
842 }
843
844 const offset_type theNumKeys = numKeys - startIndex;
845 const offset_type size = hash_type::getRecommendedSize (theNumKeys);
846#ifdef HAVE_TPETRA_DEBUG
847 TEUCHOS_TEST_FOR_EXCEPTION(
848 size == 0 && numKeys != 0, std::logic_error,
849 "Tpetra::Details::FixedHashTable constructor: "
850 "getRecommendedSize(" << numKeys << ") returned zero, "
851 "even though the number of keys " << numKeys << " is nonzero. "
852 "Please report this bug to the Tpetra developers.");
853#endif // HAVE_TPETRA_DEBUG
854 keys_type theKeys =
855 subview (keys, std::pair<offset_type, offset_type> (startIndex, numKeys));
856
857 // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
858 // fence here, but we do before other allocations.
859
860 // The array of counts must be separate from the array of offsets,
861 // in order for parallel_scan to work correctly.
862 typedef typename ptr_type::non_const_type counts_type;
863 counts_type counts ("Tpetra::FixedHashTable::counts", size);
864
865 //
866 // Count the number of "buckets" per offsets array (ptr) entry.
867 //
868
869 // Will only create the mirror for buildInParallel false - but then use it in two places
870 typename keys_type::HostMirror theKeysHost;
871
872 // The Kokkos kernel uses atomic update instructions to count the
873 // number of "buckets" per offsets array (ptr) entry. Atomic
874 // updates incur overhead, even in the sequential case. The Kokkos
875 // kernel is still correct in that case, but I would rather not
876 // incur overhead then.
877 if (buildInParallel) {
878 FHT::CountBuckets<counts_type, keys_type> functor (counts, theKeys, size);
879 using range_type = Kokkos::RangePolicy<execution_space, offset_type>;
880 const char kernelLabel[] = "Tpetra::Details::FixedHashTable CountBuckets";
881 if (debug) {
882 using key_type = typename keys_type::non_const_value_type;
883 Kokkos::pair<int, key_type> err;
884 Kokkos::parallel_reduce (kernelLabel, range_type (0, theNumKeys),
885 functor, err);
886 TEUCHOS_TEST_FOR_EXCEPTION
887 (err.first != 0, std::logic_error, "Tpetra::Details::FixedHashTable "
888 "constructor: CountBuckets found a key " << err.second << " that "
889 "results in an out-of-bounds hash value.");
890 }
891 else {
892 Kokkos::parallel_for (kernelLabel, range_type (0, theNumKeys), functor);
893 }
894 }
895 else {
896 Kokkos::HostSpace hostMemSpace;
897 theKeysHost = Kokkos::create_mirror_view(theKeys);
898 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
899 Kokkos::deep_copy(execution_space(), theKeysHost, theKeys);
900 auto countsHost = Kokkos::create_mirror_view (hostMemSpace, counts);
901
902 for (offset_type k = 0; k < theNumKeys; ++k) {
903 using key_type = typename keys_type::non_const_value_type;
904 const key_type key = theKeysHost[k];
905
906 using hash_value_type = typename hash_type::result_type;
907 const hash_value_type hashVal = hash_type::hashFunc (key, size);
908 TEUCHOS_TEST_FOR_EXCEPTION
909 (hashVal < hash_value_type (0) ||
910 hashVal >= hash_value_type (countsHost.extent (0)),
911 std::logic_error, "Tpetra::Details::FixedHashTable "
912 "constructor: Sequential CountBuckets found a key " << key
913 << " that results in an out-of-bounds hash value.");
914
915 ++countsHost[hashVal];
916 }
917 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
918 Kokkos::deep_copy (execution_space(), counts, countsHost);
919 }
920
921 // KJ: This fence is not required for the 2-argument deep_copy which calls
922 // fence, but will be required if switched to the 3-argumemt deep_copy which
923 // passes a space. The 3-argument form does not fence.
924 execution_space().fence ();
925
926 // Kokkos::View fills with zeros by default.
927 typename ptr_type::non_const_type ptr ("Tpetra::FixedHashTable::ptr", size+1);
928
929 // Compute row offsets via prefix sum:
930 //
931 // ptr[i+1] = \sum_{j=0}^{i} counts[j].
932 //
933 // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
934 // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
935 // formula is ptr[i+1] += ptr[i].
936 //
937 // parallel_scan does not incur overhead with Kokkos::Serial, but
938 // with actual parallel execution spaces, it does require multiple
939 // passes over the data. Thus, it still makes sense to have a
940 // sequential fall-back.
941
943 if (buildInParallel) {
944 computeOffsetsFromCounts (ptr, counts);
945 }
946
947 if (! buildInParallel || debug) {
948 Kokkos::HostSpace hostMemSpace;
949 auto counts_h = Kokkos::create_mirror_view_and_copy (hostMemSpace, counts);
950 auto ptr_h = Kokkos::create_mirror_view (hostMemSpace, ptr);
951
952#ifdef KOKKOS_ENABLE_SERIAL
953 Kokkos::Serial hostExecSpace;
954#else
955 Kokkos::DefaultHostExecutionSpace hostExecSpace;
956#endif // KOKKOS_ENABLE_SERIAL
957
958 computeOffsetsFromCounts (hostExecSpace, ptr_h, counts_h);
959 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
960 Kokkos::deep_copy (execution_space(), ptr, ptr_h);
961
962 if (debug) {
963 bool bad = false;
964 for (offset_type i = 0; i < size; ++i) {
965 if (ptr_h[i+1] != ptr_h[i] + counts_h[i]) {
966 bad = true;
967 }
968 }
969 TEUCHOS_TEST_FOR_EXCEPTION
970 (bad, std::logic_error, "Tpetra::Details::FixedHashTable "
971 "constructor: computeOffsetsFromCounts gave an incorrect "
972 "result.");
973 }
974 }
975
976 // KJ: computeOffsetsFromCounts calls parallel_scan which does not fence.
977 // This fence is necessary as we need to make sure that the offset view
978 // completes before the view is used in the next functor.
979 execution_space().fence ();
980
981 // Allocate the array of (key,value) pairs. Don't fill it with
982 // zeros, because we will fill it with actual data below.
983 typedef typename val_type::non_const_type nonconst_val_type;
984 nonconst_val_type val (ViewAllocateWithoutInitializing ("Tpetra::FixedHashTable::pairs"),
985 theNumKeys);
986
987 // Fill in the hash table's "values" (the (key,value) pairs).
988 typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
989 typename ptr_type::non_const_type> functor_type;
990 typename functor_type::value_type result (initMinKey, initMaxKey);
991
992 const ValueType newStartingValue = startingValue + static_cast<ValueType> (startIndex);
993 if (buildInParallel) {
994 functor_type functor (val, counts, ptr, theKeys, newStartingValue,
995 initMinKey, initMaxKey);
996 typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
997 Kokkos::parallel_reduce ("Tpetra::Details::FixedHashTable::FillPairs", range_type (0, theNumKeys), functor, result);
998 }
999 else {
1000 Kokkos::HostSpace hostMemSpace;
1001 auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
1002 auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1003 auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1004 for (offset_type k = 0; k < theNumKeys; ++k) {
1005 typedef typename hash_type::result_type hash_value_type;
1006 const KeyType key = theKeysHost[k];
1007 if (key > result.maxKey_) {
1008 result.maxKey_ = key;
1009 }
1010 if (key < result.minKey_) {
1011 result.minKey_ = key;
1012 }
1013 const ValueType theVal = newStartingValue + static_cast<ValueType> (k);
1014 const hash_value_type hashVal = hash_type::hashFunc (key, size);
1015
1016 // Return the old count; decrement afterwards.
1017 const offset_type count = counts_h[hashVal];
1018 --counts_h[hashVal];
1019 if (count == 0) {
1020 result.success_ = false; // FAILURE!
1021 break;
1022 }
1023 else {
1024 const offset_type curPos = ptr_h[hashVal+1] - count;
1025 val_h[curPos].first = key;
1026 val_h[curPos].second = theVal;
1027 }
1028 }
1029 Kokkos::deep_copy(counts, counts_h); // restore
1030 Kokkos::deep_copy(val, val_h); // restore
1031 }
1032
1033 // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1034 // reports of exceptions being thrown in Albany.
1035 //
1036 // TEUCHOS_TEST_FOR_EXCEPTION
1037 // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1038 // "init: Filling the hash table failed! Please report this bug to the "
1039 // "Tpetra developers.");
1040 (void) result; // prevent build warning (set but unused variable)
1041
1042 // "Commit" the computed arrays and other computed quantities.
1043 ptr_ = ptr;
1044 val_ = val;
1045 minKey_ = result.minKey_;
1046 maxKey_ = result.maxKey_;
1047 // We've already set firstContigKey_ and lastContigKey_ above.
1048}
1049
1050
1051template<class KeyType, class ValueType, class DeviceType>
1052void
1054init (const host_input_keys_type& keys,
1055 const host_input_vals_type& vals,
1056 KeyType initMinKey,
1057 KeyType initMaxKey)
1058{
1059 Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(4-arg)");
1060 const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
1061 TEUCHOS_TEST_FOR_EXCEPTION
1062 (static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ()),
1063 std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1064 "keys " << numKeys << " is greater than the maximum representable "
1065 "ValueType value " << ::Kokkos::ArithTraits<ValueType>::max () << ".");
1066 TEUCHOS_TEST_FOR_EXCEPTION
1067 (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, "Tpetra::"
1068 "Details::FixedHashTable: This class currently only works when the number "
1069 "of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you"
1070 ", please talk to the Tpetra developers.");
1071
1072 // There's no need to find the first and last initial contiguous
1073 // keys in this case, because if we reach this init() function, then
1074 // hasContiguousValues() is false and so get() doesn't use the
1075 // initial contiguous sequence of keys.
1076
1077 const offset_type size = hash_type::getRecommendedSize (numKeys);
1078#ifdef HAVE_TPETRA_DEBUG
1079 TEUCHOS_TEST_FOR_EXCEPTION(
1080 size == 0 && numKeys != 0, std::logic_error,
1081 "Tpetra::Details::FixedHashTable constructor: "
1082 "getRecommendedSize(" << numKeys << ") returned zero, "
1083 "even though the number of keys " << numKeys << " is nonzero. "
1084 "Please report this bug to the Tpetra developers.");
1085#endif // HAVE_TPETRA_DEBUG
1086
1087 // FIXME: Investigate a couple options:
1088 // 1. Allocate ptr_h, val_h directly on host and only deep_copy to ptr_ and val_ once at the end
1089 // 2. Do all this work as a parallel kernel with the same execution/memory spaces as ptr_ and val_
1090 // An old comment from MFH noted ptr_h should be zero-initialized, while val_h should not be initialized.
1091 // It further noted that we shouldn't need a DualView type arrangement when all setup kernels have
1092 // been "Kokkos-ized".
1093 Kokkos::HostSpace hostMemSpace;
1094 typename ptr_type::non_const_type ptr ("Tpetra::FixedHashTable::ptr", size + 1);
1095 auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1096
1097 // Allocate the array of key,value pairs. Don't waste time filling
1098 // it with zeros, because we will fill it with actual data below.
1099 using Kokkos::ViewAllocateWithoutInitializing;
1100 typedef typename val_type::non_const_type nonconst_val_type;
1101 nonconst_val_type val (ViewAllocateWithoutInitializing ("Tpetra::FixedHashTable::pairs"),
1102 numKeys);
1103 auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1104
1105 // Compute number of entries in each hash table position.
1106 for (offset_type k = 0; k < numKeys; ++k) {
1107 const typename hash_type::result_type hashVal =
1108 hash_type::hashFunc (keys[k], size);
1109 // Shift over one, so that counts[j] = ptr[j+1]. See below.
1110 ++ptr_h[hashVal+1];
1111 }
1112
1113 // Compute row offsets via prefix sum:
1114 //
1115 // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1116 //
1117 // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1118 // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1119 // formula is ptr[i+1] += ptr[i].
1120 for (offset_type i = 0; i < size; ++i) {
1121 ptr_h[i+1] += ptr_h[i];
1122 }
1123 //ptr[0] = 0; // We've already done this when initializing ptr above.
1124
1125 // curRowStart[i] is the offset of the next element in row i.
1126 typename ptr_type::non_const_type::HostMirror curRowStart ("Tpetra::FixedHashTable::curRowStart", size);
1127
1128 // Fill in the hash table.
1129 FHT::FillPairsResult<KeyType> result (initMinKey, initMaxKey);
1130 for (offset_type k = 0; k < numKeys; ++k) {
1131 typedef typename hash_type::result_type hash_value_type;
1132 const KeyType key = keys[k];
1133 if (key > result.maxKey_) {
1134 result.maxKey_ = key;
1135 }
1136 if (key < result.minKey_) {
1137 result.minKey_ = key;
1138 }
1139 const ValueType theVal = vals[k];
1140 if (theVal > maxVal_) {
1141 maxVal_ = theVal;
1142 }
1143 if (theVal < minVal_) {
1144 minVal_ = theVal;
1145 }
1146 const hash_value_type hashVal = hash_type::hashFunc (key, size);
1147
1148 const offset_type offset = curRowStart[hashVal];
1149 const offset_type curPos = ptr_h[hashVal] + offset;
1150 if (curPos >= ptr_h[hashVal+1]) {
1151 result.success_ = false; // FAILURE!
1152 }
1153 else {
1154 val_h[curPos].first = key;
1155 val_h[curPos].second = theVal;
1156 ++curRowStart[hashVal];
1157 }
1158 }
1159
1160 TEUCHOS_TEST_FOR_EXCEPTION
1161 (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1162 "init: Filling the hash table failed! Please report this bug to the "
1163 "Tpetra developers.");
1164
1165 // "Commit" the computed arrays and other computed quantities.
1166 Kokkos::deep_copy(ptr, ptr_h);
1167 Kokkos::deep_copy(val, val_h);
1168
1169 ptr_ = ptr;
1170 val_ = val;
1171 minKey_ = result.minKey_;
1172 maxKey_ = result.maxKey_;
1173 // We've already assigned to minVal_ and maxVal_ above.
1174}
1175
1176template <class KeyType, class ValueType, class DeviceType>
1177bool
1180{
1181 if (! checkedForDuplicateKeys_) {
1182 hasDuplicateKeys_ = checkForDuplicateKeys ();
1183 checkedForDuplicateKeys_ = true;
1184 }
1185 return hasDuplicateKeys_;
1186}
1187
1188template <class KeyType, class ValueType, class DeviceType>
1189bool
1191checkForDuplicateKeys () const
1192{
1193 const offset_type size = this->getSize ();
1194 // It's allowed for the hash table to have a positive number of
1195 // buckets (getSize()), but still store no entries (numPairs()).
1196 // Both cases trivially entail no duplicates.
1197 if (size == 0 || this->numPairs () == 0) {
1198 return false;
1199 }
1200 else {
1201 typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1202 functor_type functor (val_, ptr_);
1203 int hasDupKeys = 0;
1204 typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1205 Kokkos::parallel_reduce ("Tpetra::Details::FixedHashTable::CheckForDuplicateKeys", range_type (0, size), functor, hasDupKeys);
1206 return hasDupKeys > 0;
1207 }
1208}
1209
1210template <class KeyType, class ValueType, class DeviceType>
1211std::string
1213description () const
1214{
1215 std::ostringstream oss;
1216 oss << "FixedHashTable<"
1217 << Teuchos::TypeNameTraits<KeyType>::name () << ","
1218 << Teuchos::TypeNameTraits<ValueType>::name () << ">: "
1219 << "{ numKeys: " << val_.extent (0)
1220 << ", tableSize: " << this->getSize () << " }";
1221 return oss.str();
1222}
1223
1224template <class KeyType, class ValueType, class DeviceType>
1225void
1227describe (Teuchos::FancyOStream& out,
1228 const Teuchos::EVerbosityLevel verbLevel) const
1229{
1230 using std::endl;
1231 using std::setw;
1232 using Teuchos::OSTab;
1233 using Teuchos::rcpFromRef;
1234 using Teuchos::TypeNameTraits;
1235 using Teuchos::VERB_DEFAULT;
1236 using Teuchos::VERB_NONE;
1237 using Teuchos::VERB_LOW;
1238 using Teuchos::VERB_EXTREME;
1239
1240 // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1241 // access to ptr_ and val_ from the host.
1242
1243 Teuchos::EVerbosityLevel vl = verbLevel;
1244 if (vl == VERB_DEFAULT) vl = VERB_LOW;
1245
1246 if (vl == VERB_NONE) {
1247 // do nothing
1248 }
1249 else if (vl == VERB_LOW) {
1250 out << this->description() << endl;
1251 }
1252 else { // MEDIUM, HIGH or EXTREME
1253 out << "FixedHashTable:" << endl;
1254 {
1255 OSTab tab1 (rcpFromRef (out));
1256
1257 // const std::string label = this->getObjectLabel ();
1258 // if (label != "") {
1259 // out << "label: " << label << endl;
1260 // }
1261 out << "Template parameters:" << endl;
1262 {
1263 OSTab tab2 (rcpFromRef (out));
1264 out << "KeyType: " << TypeNameTraits<KeyType>::name () << endl
1265 << "ValueType: " << TypeNameTraits<ValueType>::name () << endl;
1266 }
1267
1268 const offset_type tableSize = this->getSize ();
1269 const offset_type numKeys = val_.extent (0);
1270
1271 out << "Table parameters:" << endl;
1272 {
1273 OSTab tab2 (rcpFromRef (out));
1274 out << "numKeys: " << numKeys << endl
1275 << "tableSize: " << tableSize << endl;
1276 }
1277
1278 if (vl >= VERB_EXTREME) {
1279 out << "Contents: ";
1280 if (tableSize == 0 || numKeys == 0) {
1281 out << "[]" << endl;
1282 } else {
1283 out << "[ " << endl;
1284 {
1285 OSTab tab2 (rcpFromRef (out));
1286 for (offset_type i = 0; i < tableSize; ++i) {
1287 OSTab tab3 (rcpFromRef (out));
1288 out << "[";
1289 for (offset_type k = ptr_[i]; k < ptr_[i+1]; ++k) {
1290 out << "(" << val_[k].first << "," << val_[k].second << ")";
1291 if (k + 1 < ptr_[i+1]) {
1292 out << ", ";
1293 }
1294 }
1295 out << "]" << endl;
1296 } // for each table position i
1297 }
1298 out << "]" << endl;
1299 } // The table contains entries
1300 } // vl >= VERB_EXTREME
1301 }
1302 out << endl;
1303 } // if vl > VERB_LOW
1304}
1305
1306} // namespace Details
1307} // namespace Tpetra
1308
1309// Macro that explicitly instantiates FixedHashTable for the given local
1310// ordinal (LO) and global ordinal (GO) types. Note that FixedHashTable's
1311// template parameters occur in the opposite order of most Tpetra
1312// classes. This is because FixedHashTable performs global-to-local
1313// lookup, and the convention in templated C++ lookup tables (such as
1314// std::map) is <KeyType, ValueType>.
1315//
1316// This macro must be explanded within the Tpetra::Details namespace.
1317#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT_DEFAULTNODE(LO,GO) \
1318 template class FixedHashTable< GO , LO >;
1319
1320// Macro that explicitly instantiates FixedHashTable for the given
1321// local ordinal (LO), global ordinal (GO), and Kokkos device (DEVICE)
1322// types. Note that FixedHashTable's first two template parameters
1323// occur in the opposite order of most Tpetra classes. This is
1324// because FixedHashTable performs global-to-local lookup, and the
1325// convention in templated C++ lookup tables (such as std::map) is
1326// <KeyType, ValueType>.
1327//
1328// This macro must be explanded within the Tpetra::Details namespace.
1329#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, DEVICE) \
1330 template class FixedHashTable< GO , LO , DEVICE >;
1331
1332#endif // TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
static bool debug()
Whether Tpetra is in debug mode.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
KOKKOS_INLINE_FUNCTION void join(value_type &dst, const value_type &src) const
Combine two intermediate reduction results.
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel for functor for counting "buckets" in the FixedHashTable.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
std::string description() const
Implementation of Teuchos::Describable interface.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.
KOKKOS_DEFAULTED_FUNCTION FixedHashTable()=default
Default constructor; makes an empty table.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
Nonmember function that computes a residual Computes R = B - A * X.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Reduction result for FillPairs functor below.
bool success_
Whether fill succeeded (it can only fail on a bug).
The hash function for FixedHashTable.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &, const offset_type &)