Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_ExecPolicy.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
19static_assert(false,
20 "Including non-public Kokkos header files is not allowed.");
21#endif
22#ifndef KOKKOS_EXECPOLICY_HPP
23#define KOKKOS_EXECPOLICY_HPP
24
25#include <Kokkos_Core_fwd.hpp>
26#include <impl/Kokkos_Traits.hpp>
27#include <impl/Kokkos_Error.hpp>
28#include <impl/Kokkos_AnalyzePolicy.hpp>
29#include <Kokkos_Concepts.hpp>
30#include <Kokkos_TypeInfo.hpp>
31#ifndef KOKKOS_ENABLE_IMPL_TYPEINFO
32#include <typeinfo>
33#endif
34#include <limits>
35
36//----------------------------------------------------------------------------
37
38namespace Kokkos {
39
40struct ParallelForTag {};
41struct ParallelScanTag {};
42struct ParallelReduceTag {};
43
44struct ChunkSize {
45 int value;
46 explicit ChunkSize(int value_) : value(value_) {}
47#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
48 template <typename T = void>
49 KOKKOS_DEPRECATED_WITH_COMMENT("ChunkSize should be constructed explicitly.")
50 ChunkSize(int value_) : value(value_) {}
51#endif
52};
53
75template <class... Properties>
76class RangePolicy : public Impl::PolicyTraits<Properties...> {
77 public:
78 using traits = Impl::PolicyTraits<Properties...>;
79
80 private:
81 typename traits::execution_space m_space;
82 typename traits::index_type m_begin;
83 typename traits::index_type m_end;
84 typename traits::index_type m_granularity;
85 typename traits::index_type m_granularity_mask;
86
87 template <class... OtherProperties>
88 friend class RangePolicy;
89
90 public:
92 using execution_policy = RangePolicy<Properties...>;
93 using member_type = typename traits::index_type;
94 using index_type = typename traits::index_type;
95
96 KOKKOS_INLINE_FUNCTION const typename traits::execution_space& space() const {
97 return m_space;
98 }
99 KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
100 KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
101
102 // TODO: find a better workaround for Clangs weird instantiation order
103 // This thing is here because of an instantiation error, where the RangePolicy
104 // is inserted into FunctorValue Traits, which tries decltype on the operator.
105 // It tries to do this even though the first argument of parallel for clearly
106 // doesn't match.
107 void operator()(const int&) const {}
108
109 template <class... OtherProperties>
110 RangePolicy(const RangePolicy<OtherProperties...>& p)
111 : traits(p), // base class may contain data such as desired occupancy
112 m_space(p.m_space),
113 m_begin(p.m_begin),
114 m_end(p.m_end),
115 m_granularity(p.m_granularity),
116 m_granularity_mask(p.m_granularity_mask) {}
117
118 inline RangePolicy()
119 : m_space(),
120 m_begin(0),
121 m_end(0),
122 m_granularity(0),
123 m_granularity_mask(0) {}
124
126 template <typename IndexType1, typename IndexType2,
127 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
128 std::is_convertible_v<IndexType2, member_type>),
129 bool> = false>
130 inline RangePolicy(const IndexType1 work_begin, const IndexType2 work_end)
131 : RangePolicy(typename traits::execution_space(), work_begin, work_end) {}
132
134 template <typename IndexType1, typename IndexType2,
135 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
136 std::is_convertible_v<IndexType2, member_type>),
137 bool> = false>
138 inline RangePolicy(const typename traits::execution_space& work_space,
139 const IndexType1 work_begin, const IndexType2 work_end)
140 : m_space(work_space),
141 m_begin(work_begin),
142 m_end(work_end),
143 m_granularity(0),
144 m_granularity_mask(0) {
145 check_conversion_safety(work_begin);
146 check_conversion_safety(work_end);
147 check_bounds_validity();
148 set_auto_chunk_size();
149 }
150
151 template <typename IndexType1, typename IndexType2,
152 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
153 std::is_convertible_v<IndexType2, member_type>),
154 bool> = false>
155 RangePolicy(const typename traits::execution_space& work_space,
156 const IndexType1 work_begin, const IndexType2 work_end,
157 const ChunkSize chunk_size)
158 : m_space(work_space),
159 m_begin(work_begin),
160 m_end(work_end),
161 m_granularity(0),
162 m_granularity_mask(0) {
163 check_conversion_safety(work_begin);
164 check_conversion_safety(work_end);
165 check_bounds_validity();
167 }
168
170 template <typename IndexType1, typename IndexType2, typename... Args,
171 std::enable_if_t<(std::is_convertible_v<IndexType1, member_type> &&
172 std::is_convertible_v<IndexType2, member_type>),
173 bool> = false>
174 RangePolicy(const IndexType1 work_begin, const IndexType2 work_end,
175 const ChunkSize chunk_size)
176 : RangePolicy(typename traits::execution_space(), work_begin, work_end,
177 chunk_size) {}
178
179 public:
180#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
181 KOKKOS_DEPRECATED_WITH_COMMENT("Use set_chunk_size instead")
182 inline void set(ChunkSize chunksize) {
183 m_granularity = chunksize.value;
184 m_granularity_mask = m_granularity - 1;
185 }
186#endif
187
188 public:
190 inline member_type chunk_size() const { return m_granularity; }
191
193 inline RangePolicy& set_chunk_size(int chunk_size) {
194 m_granularity = chunk_size;
195 m_granularity_mask = m_granularity - 1;
196 return *this;
197 }
198
199 private:
201 inline void set_auto_chunk_size() {
202#ifdef KOKKOS_ENABLE_SYCL
203 if (std::is_same_v<typename traits::execution_space, Kokkos::SYCL>) {
204 // chunk_size <=1 lets the compiler choose the workgroup size when
205 // launching kernels
206 m_granularity = 1;
207 m_granularity_mask = 0;
208 return;
209 }
210#endif
211 auto concurrency = static_cast<int64_t>(m_space.concurrency());
212 if (concurrency == 0) concurrency = 1;
213
214 if (m_granularity > 0) {
215 if (!Impl::is_integral_power_of_two(m_granularity))
216 Kokkos::abort("RangePolicy blocking granularity must be power of two");
217 }
218
219 int64_t new_chunk_size = 1;
220 while (new_chunk_size * 100 * concurrency <
221 static_cast<int64_t>(m_end - m_begin))
222 new_chunk_size *= 2;
223 if (new_chunk_size < 128) {
224 new_chunk_size = 1;
225 while ((new_chunk_size * 40 * concurrency <
226 static_cast<int64_t>(m_end - m_begin)) &&
227 (new_chunk_size < 128))
228 new_chunk_size *= 2;
229 }
230 m_granularity = new_chunk_size;
231 m_granularity_mask = m_granularity - 1;
232 }
233
234 void check_bounds_validity() {
235 if (m_end < m_begin) {
236 std::string msg = "Kokkos::RangePolicy bounds error: The lower bound (" +
237 std::to_string(m_begin) +
238 ") is greater than the upper bound (" +
239 std::to_string(m_end) + ").\n";
240#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
241 Kokkos::abort(msg.c_str());
242#endif
243 m_begin = 0;
244 m_end = 0;
245#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
246 Kokkos::Impl::log_warning(msg);
247#endif
248 }
249 }
250
251 // To be replaced with std::in_range (c++20)
252 template <typename IndexType>
253 static void check_conversion_safety([[maybe_unused]] const IndexType bound) {
254 // Checking that the round-trip conversion preserves input index value
255 if constexpr (std::is_convertible_v<member_type, IndexType>) {
256#if !defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) || \
257 defined(KOKKOS_ENABLE_DEPRECATION_WARNINGS)
258
259 std::string msg =
260 "Kokkos::RangePolicy bound type error: an unsafe implicit conversion "
261 "is performed on a bound (" +
262 std::to_string(bound) +
263 "), which may "
264 "not preserve its original value.\n";
265 bool warn = false;
266
267 if constexpr (std::is_arithmetic_v<member_type> &&
268 (std::is_signed_v<IndexType> !=
269 std::is_signed_v<member_type>)) {
270 // check signed to unsigned
271 if constexpr (std::is_signed_v<IndexType>)
272 warn |= (bound < static_cast<IndexType>(
273 std::numeric_limits<member_type>::min()));
274
275 // check unsigned to signed
276 if constexpr (std::is_signed_v<member_type>)
277 warn |= (bound > static_cast<IndexType>(
278 std::numeric_limits<member_type>::max()));
279 }
280
281 // check narrowing
282 warn |=
283 (static_cast<IndexType>(static_cast<member_type>(bound)) != bound);
284
285 if (warn) {
286#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4
287 Kokkos::abort(msg.c_str());
288#endif
289
290#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
291 Kokkos::Impl::log_warning(msg);
292#endif
293 }
294#endif
295 }
296 }
297
298 public:
303 struct WorkRange {
304 using work_tag = typename RangePolicy<Properties...>::work_tag;
305 using member_type = typename RangePolicy<Properties...>::member_type;
306
307 KOKKOS_INLINE_FUNCTION member_type begin() const { return m_begin; }
308 KOKKOS_INLINE_FUNCTION member_type end() const { return m_end; }
309
314 KOKKOS_INLINE_FUNCTION
315 WorkRange(const RangePolicy& range, const int part_rank,
316 const int part_size)
317 : m_begin(0), m_end(0) {
318 if (part_size) {
319 // Split evenly among partitions, then round up to the granularity.
320 const member_type work_part =
321 ((((range.end() - range.begin()) + (part_size - 1)) / part_size) +
322 range.m_granularity_mask) &
323 ~member_type(range.m_granularity_mask);
324
325 m_begin = range.begin() + work_part * part_rank;
326 m_end = m_begin + work_part;
327
328 if (range.end() < m_begin) m_begin = range.end();
329 if (range.end() < m_end) m_end = range.end();
330 }
331 }
332
333 private:
334 member_type m_begin;
335 member_type m_end;
336 WorkRange();
337 WorkRange& operator=(const WorkRange&);
338 };
339};
340
341RangePolicy() -> RangePolicy<>;
342
343RangePolicy(int64_t, int64_t) -> RangePolicy<>;
344RangePolicy(int64_t, int64_t, ChunkSize const&) -> RangePolicy<>;
345
346RangePolicy(DefaultExecutionSpace const&, int64_t, int64_t) -> RangePolicy<>;
347RangePolicy(DefaultExecutionSpace const&, int64_t, int64_t, ChunkSize const&)
348 -> RangePolicy<>;
349
350template <typename ES, typename = std::enable_if_t<is_execution_space_v<ES>>>
351RangePolicy(ES const&, int64_t, int64_t) -> RangePolicy<ES>;
352
353template <typename ES, typename = std::enable_if_t<is_execution_space_v<ES>>>
354RangePolicy(ES const&, int64_t, int64_t, ChunkSize const&) -> RangePolicy<ES>;
355
356} // namespace Kokkos
357
358//----------------------------------------------------------------------------
359//----------------------------------------------------------------------------
360
361namespace Kokkos {
362
363namespace Impl {
364
365template <class ExecSpace, class... Properties>
366class TeamPolicyInternal : public Impl::PolicyTraits<Properties...> {
367 private:
368 using traits = Impl::PolicyTraits<Properties...>;
369
370 public:
371 using index_type = typename traits::index_type;
372
373 //----------------------------------------
384 template <class FunctorType>
385 static int team_size_max(const FunctorType&);
386
397 template <class FunctorType>
398 static int team_size_recommended(const FunctorType&);
399
400 template <class FunctorType>
401 static int team_size_recommended(const FunctorType&, const int&);
402
403 template <class FunctorType>
404 int team_size_recommended(const FunctorType& functor,
405 const int vector_length);
406
407 //----------------------------------------
409 TeamPolicyInternal(const typename traits::execution_space&,
410 int league_size_request, int team_size_request,
411 int vector_length_request = 1);
412
413 TeamPolicyInternal(const typename traits::execution_space&,
414 int league_size_request, const Kokkos::AUTO_t&,
415 int vector_length_request = 1);
416
419 TeamPolicyInternal(int league_size_request, int team_size_request,
420 int vector_length_request = 1);
421
422 TeamPolicyInternal(int league_size_request, const Kokkos::AUTO_t&,
423 int vector_length_request = 1);
424
425 /* TeamPolicyInternal( int league_size_request , int team_size_request );
426
427 TeamPolicyInternal( int league_size_request , const Kokkos::AUTO_t & );*/
428
434 KOKKOS_INLINE_FUNCTION int league_size() const;
435
441 KOKKOS_INLINE_FUNCTION int team_size() const;
442
445 inline bool impl_auto_team_size() const;
448 inline bool impl_auto_vector_length() const;
449
450 static int vector_length_max();
451
452 KOKKOS_INLINE_FUNCTION int impl_vector_length() const;
453
454 inline typename traits::index_type chunk_size() const;
455
456 inline TeamPolicyInternal& set_chunk_size(int chunk_size);
457
461 struct member_type {
463 KOKKOS_INLINE_FUNCTION
464 typename traits::execution_space::scratch_memory_space team_shmem() const;
465
467 KOKKOS_INLINE_FUNCTION int league_rank() const;
468
470 KOKKOS_INLINE_FUNCTION int league_size() const;
471
473 KOKKOS_INLINE_FUNCTION int team_rank() const;
474
476 KOKKOS_INLINE_FUNCTION int team_size() const;
477
479 KOKKOS_INLINE_FUNCTION void team_barrier() const;
480
483 template <class JoinOp>
484 KOKKOS_INLINE_FUNCTION typename JoinOp::value_type team_reduce(
485 const typename JoinOp::value_type, const JoinOp&) const;
486
492 template <typename Type>
493 KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value) const;
494
504 template <typename Type>
505 KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value,
506 Type* const global_accum) const;
507 };
508};
509
510struct PerTeamValue {
511 size_t value;
512 PerTeamValue(size_t arg);
513};
514
515struct PerThreadValue {
516 size_t value;
517 PerThreadValue(size_t arg);
518};
519
520template <class iType, class... Args>
521struct ExtractVectorLength {
522 static inline iType value(
523 std::enable_if_t<std::is_integral_v<iType>, iType> val, Args...) {
524 return val;
525 }
526 static inline std::enable_if_t<!std::is_integral_v<iType>, int> value(
527 std::enable_if_t<!std::is_integral_v<iType>, iType>, Args...) {
528 return 1;
529 }
530};
531
532template <class iType, class... Args>
533inline std::enable_if_t<std::is_integral_v<iType>, iType> extract_vector_length(
534 iType val, Args...) {
535 return val;
536}
537
538template <class iType, class... Args>
539inline std::enable_if_t<!std::is_integral_v<iType>, int> extract_vector_length(
540 iType, Args...) {
541 return 1;
542}
543
544} // namespace Impl
545
546Impl::PerTeamValue PerTeam(const size_t& arg);
547Impl::PerThreadValue PerThread(const size_t& arg);
548
549struct ScratchRequest {
550 int level;
551
552 size_t per_team;
553 size_t per_thread;
554
555 inline ScratchRequest(const int& level_,
556 const Impl::PerTeamValue& team_value) {
557 level = level_;
558 per_team = team_value.value;
559 per_thread = 0;
560 }
561
562 inline ScratchRequest(const int& level_,
563 const Impl::PerThreadValue& thread_value) {
564 level = level_;
565 per_team = 0;
566 per_thread = thread_value.value;
567 }
568
569 inline ScratchRequest(const int& level_, const Impl::PerTeamValue& team_value,
570 const Impl::PerThreadValue& thread_value) {
571 level = level_;
572 per_team = team_value.value;
573 per_thread = thread_value.value;
574 }
575
576 inline ScratchRequest(const int& level_,
577 const Impl::PerThreadValue& thread_value,
578 const Impl::PerTeamValue& team_value) {
579 level = level_;
580 per_team = team_value.value;
581 per_thread = thread_value.value;
582 }
583};
584
585// Causes abnormal program termination if level is not `0` or `1`
586void team_policy_check_valid_storage_level_argument(int level);
587
614template <class... Properties>
615class TeamPolicy
616 : public Impl::TeamPolicyInternal<
617 typename Impl::PolicyTraits<Properties...>::execution_space,
618 Properties...> {
619 using internal_policy = Impl::TeamPolicyInternal<
620 typename Impl::PolicyTraits<Properties...>::execution_space,
621 Properties...>;
622
623 template <class... OtherProperties>
624 friend class TeamPolicy;
625
626 public:
627 using traits = Impl::PolicyTraits<Properties...>;
628
629 using execution_policy = TeamPolicy<Properties...>;
630
631 TeamPolicy() : internal_policy(0, AUTO) {}
632
634 TeamPolicy(const typename traits::execution_space& space_,
635 int league_size_request, int team_size_request,
636 int vector_length_request = 1)
637 : internal_policy(space_, league_size_request, team_size_request,
638 vector_length_request) {}
639
640 TeamPolicy(const typename traits::execution_space& space_,
641 int league_size_request, const Kokkos::AUTO_t&,
642 int vector_length_request = 1)
643 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
644 vector_length_request) {}
645
646 TeamPolicy(const typename traits::execution_space& space_,
647 int league_size_request, const Kokkos::AUTO_t&,
648 const Kokkos::AUTO_t&)
649 : internal_policy(space_, league_size_request, Kokkos::AUTO(),
650 Kokkos::AUTO()) {}
651 TeamPolicy(const typename traits::execution_space& space_,
652 int league_size_request, const int team_size_request,
653 const Kokkos::AUTO_t&)
654 : internal_policy(space_, league_size_request, team_size_request,
655 Kokkos::AUTO()) {}
658 TeamPolicy(int league_size_request, int team_size_request,
659 int vector_length_request = 1)
660 : internal_policy(league_size_request, team_size_request,
661 vector_length_request) {}
662
663 TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
664 int vector_length_request = 1)
665 : internal_policy(league_size_request, Kokkos::AUTO(),
666 vector_length_request) {}
667
668 TeamPolicy(int league_size_request, const Kokkos::AUTO_t&,
669 const Kokkos::AUTO_t&)
670 : internal_policy(league_size_request, Kokkos::AUTO(), Kokkos::AUTO()) {}
671 TeamPolicy(int league_size_request, const int team_size_request,
672 const Kokkos::AUTO_t&)
673 : internal_policy(league_size_request, team_size_request,
674 Kokkos::AUTO()) {}
675
676 template <class... OtherProperties>
677 TeamPolicy(const TeamPolicy<OtherProperties...> p) : internal_policy(p) {
678 // Cannot call converting constructor in the member initializer list because
679 // it is not a direct base.
680 internal_policy::traits::operator=(p);
681 }
682
683 private:
684 TeamPolicy(const internal_policy& p) : internal_policy(p) {}
685
686 public:
687 inline TeamPolicy& set_chunk_size(int chunk) {
688 static_assert(std::is_same<decltype(internal_policy::set_chunk_size(chunk)),
689 internal_policy&>::value,
690 "internal set_chunk_size should return a reference");
691 return static_cast<TeamPolicy&>(internal_policy::set_chunk_size(chunk));
692 }
693
694 inline TeamPolicy& set_scratch_size(const int& level,
695 const Impl::PerTeamValue& per_team) {
696 static_assert(std::is_same<decltype(internal_policy::set_scratch_size(
697 level, per_team)),
698 internal_policy&>::value,
699 "internal set_chunk_size should return a reference");
700
701 team_policy_check_valid_storage_level_argument(level);
702 return static_cast<TeamPolicy&>(
703 internal_policy::set_scratch_size(level, per_team));
704 }
705 inline TeamPolicy& set_scratch_size(const int& level,
706 const Impl::PerThreadValue& per_thread) {
707 team_policy_check_valid_storage_level_argument(level);
708 return static_cast<TeamPolicy&>(
709 internal_policy::set_scratch_size(level, per_thread));
710 }
711 inline TeamPolicy& set_scratch_size(const int& level,
712 const Impl::PerTeamValue& per_team,
713 const Impl::PerThreadValue& per_thread) {
714 team_policy_check_valid_storage_level_argument(level);
715 return static_cast<TeamPolicy&>(
716 internal_policy::set_scratch_size(level, per_team, per_thread));
717 }
718 inline TeamPolicy& set_scratch_size(const int& level,
719 const Impl::PerThreadValue& per_thread,
720 const Impl::PerTeamValue& per_team) {
721 team_policy_check_valid_storage_level_argument(level);
722 return static_cast<TeamPolicy&>(
723 internal_policy::set_scratch_size(level, per_team, per_thread));
724 }
725};
726
727// Execution space not provided deduces to TeamPolicy<>
728
729TeamPolicy() -> TeamPolicy<>;
730
731TeamPolicy(int, int) -> TeamPolicy<>;
732TeamPolicy(int, int, int) -> TeamPolicy<>;
733TeamPolicy(int, Kokkos::AUTO_t const&) -> TeamPolicy<>;
734TeamPolicy(int, Kokkos::AUTO_t const&, int) -> TeamPolicy<>;
735TeamPolicy(int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&) -> TeamPolicy<>;
736TeamPolicy(int, int, Kokkos::AUTO_t const&) -> TeamPolicy<>;
737
738// DefaultExecutionSpace deduces to TeamPolicy<>
739
740TeamPolicy(DefaultExecutionSpace const&, int, int) -> TeamPolicy<>;
741TeamPolicy(DefaultExecutionSpace const&, int, int, int) -> TeamPolicy<>;
742TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&)
743 -> TeamPolicy<>;
744TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&, int)
745 -> TeamPolicy<>;
746TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&,
747 Kokkos::AUTO_t const&) -> TeamPolicy<>;
748TeamPolicy(DefaultExecutionSpace const&, int, int, Kokkos::AUTO_t const&)
749 -> TeamPolicy<>;
750
751// ES != DefaultExecutionSpace deduces to TeamPolicy<ES>
752
753template <typename ES,
754 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
755TeamPolicy(ES const&, int, int) -> TeamPolicy<ES>;
756
757template <typename ES,
758 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
759TeamPolicy(ES const&, int, int, int) -> TeamPolicy<ES>;
760
761template <typename ES,
762 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
763TeamPolicy(ES const&, int, Kokkos::AUTO_t const&) -> TeamPolicy<ES>;
764
765template <typename ES,
766 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
767TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, int) -> TeamPolicy<ES>;
768
769template <typename ES,
770 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
771TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&)
772 -> TeamPolicy<ES>;
773
774template <typename ES,
775 typename = std::enable_if_t<Kokkos::is_execution_space_v<ES>>>
776TeamPolicy(ES const&, int, int, Kokkos::AUTO_t const&) -> TeamPolicy<ES>;
777
778namespace Impl {
779
780template <typename iType, class TeamMemberType>
781struct TeamThreadRangeBoundariesStruct {
782 private:
783 KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
784 const iType& arg_end,
785 const iType& arg_rank,
786 const iType& arg_size) {
787 return arg_begin +
788 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
789 }
790
791 KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
792 const iType& arg_end,
793 const iType& arg_rank,
794 const iType& arg_size) {
795 const iType end_ =
796 arg_begin +
797 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
798 return end_ < arg_end ? end_ : arg_end;
799 }
800
801 public:
802 using index_type = iType;
803 const iType start;
804 const iType end;
805 enum { increment = 1 };
806 const TeamMemberType& thread;
807
808 KOKKOS_INLINE_FUNCTION
809 TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
810 const iType& arg_end)
811 : start(
812 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
813 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
814 thread(arg_thread) {}
815
816 KOKKOS_INLINE_FUNCTION
817 TeamThreadRangeBoundariesStruct(const TeamMemberType& arg_thread,
818 const iType& arg_begin, const iType& arg_end)
819 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
820 arg_thread.team_size())),
821 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
822 arg_thread.team_size())),
823 thread(arg_thread) {}
824};
825
826template <typename iType, class TeamMemberType>
827struct TeamVectorRangeBoundariesStruct {
828 private:
829 KOKKOS_INLINE_FUNCTION static iType ibegin(const iType& arg_begin,
830 const iType& arg_end,
831 const iType& arg_rank,
832 const iType& arg_size) {
833 return arg_begin +
834 ((arg_end - arg_begin + arg_size - 1) / arg_size) * arg_rank;
835 }
836
837 KOKKOS_INLINE_FUNCTION static iType iend(const iType& arg_begin,
838 const iType& arg_end,
839 const iType& arg_rank,
840 const iType& arg_size) {
841 const iType end_ =
842 arg_begin +
843 ((arg_end - arg_begin + arg_size - 1) / arg_size) * (arg_rank + 1);
844 return end_ < arg_end ? end_ : arg_end;
845 }
846
847 public:
848 using index_type = iType;
849 const iType start;
850 const iType end;
851 enum { increment = 1 };
852 const TeamMemberType& thread;
853
854 KOKKOS_INLINE_FUNCTION
855 TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
856 const iType& arg_end)
857 : start(
858 ibegin(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
859 end(iend(0, arg_end, arg_thread.team_rank(), arg_thread.team_size())),
860 thread(arg_thread) {}
861
862 KOKKOS_INLINE_FUNCTION
863 TeamVectorRangeBoundariesStruct(const TeamMemberType& arg_thread,
864 const iType& arg_begin, const iType& arg_end)
865 : start(ibegin(arg_begin, arg_end, arg_thread.team_rank(),
866 arg_thread.team_size())),
867 end(iend(arg_begin, arg_end, arg_thread.team_rank(),
868 arg_thread.team_size())),
869 thread(arg_thread) {}
870};
871
872template <typename iType, class TeamMemberType>
873struct ThreadVectorRangeBoundariesStruct {
874 using index_type = iType;
875 const index_type start;
876 const index_type end;
877 enum { increment = 1 };
878
879 KOKKOS_INLINE_FUNCTION
880 constexpr ThreadVectorRangeBoundariesStruct(const TeamMemberType,
881 const index_type& count) noexcept
882 : start(static_cast<index_type>(0)), end(count) {}
883
884 KOKKOS_INLINE_FUNCTION
885 constexpr ThreadVectorRangeBoundariesStruct(
886 const TeamMemberType, const index_type& arg_begin,
887 const index_type& arg_end) noexcept
888 : start(static_cast<index_type>(arg_begin)), end(arg_end) {}
889};
890
891template <class TeamMemberType>
892struct ThreadSingleStruct {
893 const TeamMemberType& team_member;
894 KOKKOS_INLINE_FUNCTION
895 ThreadSingleStruct(const TeamMemberType& team_member_)
896 : team_member(team_member_) {}
897};
898
899template <class TeamMemberType>
900struct VectorSingleStruct {
901 const TeamMemberType& team_member;
902 KOKKOS_INLINE_FUNCTION
903 VectorSingleStruct(const TeamMemberType& team_member_)
904 : team_member(team_member_) {}
905};
906
907} // namespace Impl
908
916template <typename iType, class TeamMemberType, class _never_use_this_overload>
917KOKKOS_INLINE_FUNCTION_DELETED
918 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
919 TeamThreadRange(const TeamMemberType&, const iType& count) = delete;
920
928template <typename iType1, typename iType2, class TeamMemberType,
929 class _never_use_this_overload>
930KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
931 std::common_type_t<iType1, iType2>, TeamMemberType>
932TeamThreadRange(const TeamMemberType&, const iType1& begin,
933 const iType2& end) = delete;
934
942template <typename iType, class TeamMemberType, class _never_use_this_overload>
943KOKKOS_INLINE_FUNCTION_DELETED
944 Impl::TeamThreadRangeBoundariesStruct<iType, TeamMemberType>
945 TeamVectorRange(const TeamMemberType&, const iType& count) = delete;
946
954template <typename iType1, typename iType2, class TeamMemberType,
955 class _never_use_this_overload>
956KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct<
957 std::common_type_t<iType1, iType2>, TeamMemberType>
958TeamVectorRange(const TeamMemberType&, const iType1& begin,
959 const iType2& end) = delete;
960
968template <typename iType, class TeamMemberType, class _never_use_this_overload>
969KOKKOS_INLINE_FUNCTION_DELETED
970 Impl::ThreadVectorRangeBoundariesStruct<iType, TeamMemberType>
971 ThreadVectorRange(const TeamMemberType&, const iType& count) = delete;
972
973template <typename iType1, typename iType2, class TeamMemberType,
974 class _never_use_this_overload>
975KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct<
976 std::common_type_t<iType1, iType2>, TeamMemberType>
977ThreadVectorRange(const TeamMemberType&, const iType1& arg_begin,
978 const iType2& arg_end) = delete;
979
980namespace Impl {
981
982enum class TeamMDRangeLastNestLevel : bool { NotLastNestLevel, LastNestLevel };
983enum class TeamMDRangeParThread : bool { NotParThread, ParThread };
984enum class TeamMDRangeParVector : bool { NotParVector, ParVector };
985enum class TeamMDRangeThreadAndVector : bool { NotBoth, Both };
986
987template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
988struct HostBasedNestLevel;
989
990template <typename Rank, TeamMDRangeThreadAndVector ThreadAndVector>
991struct AcceleratorBasedNestLevel;
992
993// ThreadAndVectorNestLevel determines on which nested level parallelization
994// happens.
995// - Rank is Kokkos::Rank<TotalNestLevel, Iter>
996// - TotalNestLevel is the total number of loop nests
997// - Iter is whether to go forward or backward through ranks (i.e. the
998// iteration order for MDRangePolicy)
999// - ThreadAndVector determines whether both vector and thread parallelism is
1000// in use
1001template <typename Rank, typename ExecSpace,
1002 TeamMDRangeThreadAndVector ThreadAndVector>
1003struct ThreadAndVectorNestLevel;
1004
1005struct NoReductionTag {};
1006
1007template <typename Rank, typename TeamMDPolicy, typename Lambda,
1008 typename ReductionValueType>
1009KOKKOS_INLINE_FUNCTION void md_parallel_impl(TeamMDPolicy const& policy,
1010 Lambda const& lambda,
1011 ReductionValueType&& val);
1012} // namespace Impl
1013
1014template <typename Rank, typename TeamHandle>
1015struct TeamThreadMDRange;
1016
1017template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
1018struct TeamThreadMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1019 using NestLevelType = int;
1020 using BoundaryType = int;
1021 using TeamHandleType = TeamHandle;
1022 using ExecutionSpace = typename TeamHandleType::execution_space;
1023 using ArrayLayout = typename ExecutionSpace::array_layout;
1024
1025 static constexpr NestLevelType total_nest_level =
1026 Rank<N, OuterDir, InnerDir>::rank;
1027 static constexpr Iterate iter = OuterDir;
1028 static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1029 static constexpr auto par_vector = Impl::TeamMDRangeParVector::NotParVector;
1030
1031 static constexpr Iterate direction =
1032 OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
1033 ArrayLayout>::outer_iteration_pattern
1034 : iter;
1035
1036 template <class... Args>
1037 KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType const& team_, Args&&... args)
1038 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
1039 static_assert(sizeof...(Args) == total_nest_level);
1040 }
1041
1042 TeamHandleType const& team;
1043 BoundaryType boundaries[total_nest_level];
1044};
1045
1046template <typename TeamHandle, typename... Args>
1047KOKKOS_DEDUCTION_GUIDE TeamThreadMDRange(TeamHandle const&, Args&&...)
1048 -> TeamThreadMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
1049
1050template <typename Rank, typename TeamHandle>
1051struct ThreadVectorMDRange;
1052
1053template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
1054struct ThreadVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1055 using NestLevelType = int;
1056 using BoundaryType = int;
1057 using TeamHandleType = TeamHandle;
1058 using ExecutionSpace = typename TeamHandleType::execution_space;
1059 using ArrayLayout = typename ExecutionSpace::array_layout;
1060
1061 static constexpr NestLevelType total_nest_level =
1062 Rank<N, OuterDir, InnerDir>::rank;
1063 static constexpr Iterate iter = OuterDir;
1064 static constexpr auto par_thread = Impl::TeamMDRangeParThread::NotParThread;
1065 static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1066
1067 static constexpr Iterate direction =
1068 OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector<
1069 ArrayLayout>::outer_iteration_pattern
1070 : iter;
1071
1072 template <class... Args>
1073 KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType const& team_,
1074 Args&&... args)
1075 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
1076 static_assert(sizeof...(Args) == total_nest_level);
1077 }
1078
1079 TeamHandleType const& team;
1080 BoundaryType boundaries[total_nest_level];
1081};
1082
1083template <typename TeamHandle, typename... Args>
1084KOKKOS_DEDUCTION_GUIDE ThreadVectorMDRange(TeamHandle const&, Args&&...)
1085 -> ThreadVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
1086
1087template <typename Rank, typename TeamHandle>
1088struct TeamVectorMDRange;
1089
1090template <unsigned N, Iterate OuterDir, Iterate InnerDir, typename TeamHandle>
1091struct TeamVectorMDRange<Rank<N, OuterDir, InnerDir>, TeamHandle> {
1092 using NestLevelType = int;
1093 using BoundaryType = int;
1094 using TeamHandleType = TeamHandle;
1095 using ExecutionSpace = typename TeamHandleType::execution_space;
1096 using ArrayLayout = typename ExecutionSpace::array_layout;
1097
1098 static constexpr NestLevelType total_nest_level =
1099 Rank<N, OuterDir, InnerDir>::rank;
1100 static constexpr Iterate iter = OuterDir;
1101 static constexpr auto par_thread = Impl::TeamMDRangeParThread::ParThread;
1102 static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector;
1103
1104 static constexpr Iterate direction =
1105 iter == Iterate::Default ? Impl::layout_iterate_type_selector<
1106 ArrayLayout>::outer_iteration_pattern
1107 : iter;
1108
1109 template <class... Args>
1110 KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType const& team_,
1111 Args&&... args)
1112 : team(team_), boundaries{static_cast<BoundaryType>(args)...} {
1113 static_assert(sizeof...(Args) == total_nest_level);
1114 }
1115
1116 TeamHandleType const& team;
1117 BoundaryType boundaries[total_nest_level];
1118};
1119
1120template <typename TeamHandle, typename... Args>
1121KOKKOS_DEDUCTION_GUIDE TeamVectorMDRange(TeamHandle const&, Args&&...)
1122 -> TeamVectorMDRange<Rank<sizeof...(Args), Iterate::Default>, TeamHandle>;
1123
1124template <typename Rank, typename TeamHandle, typename Lambda,
1125 typename ReducerValueType>
1126KOKKOS_INLINE_FUNCTION void parallel_reduce(
1127 TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1128 ReducerValueType& val) {
1129 static_assert(
1130 !std::is_array_v<ReducerValueType> &&
1131 !std::is_pointer_v<ReducerValueType> &&
1132 !Kokkos::is_reducer_v<ReducerValueType>,
1133 "Only scalar return types are allowed!");
1134
1135 val = ReducerValueType{};
1136 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1137 policy.team.team_reduce(
1138 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1139}
1140
1141template <typename Rank, typename TeamHandle, typename Lambda>
1142KOKKOS_INLINE_FUNCTION void parallel_for(
1143 TeamThreadMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1144 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1145}
1146
1147template <typename Rank, typename TeamHandle, typename Lambda,
1148 typename ReducerValueType>
1149KOKKOS_INLINE_FUNCTION void parallel_reduce(
1150 ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1151 ReducerValueType& val) {
1152 static_assert(
1153 !std::is_array_v<ReducerValueType> &&
1154 !std::is_pointer_v<ReducerValueType> &&
1155 !Kokkos::is_reducer_v<ReducerValueType>,
1156 "Only a scalar return types are allowed!");
1157
1158 val = ReducerValueType{};
1159 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1160 if constexpr (false
1161#ifdef KOKKOS_ENABLE_CUDA
1162 || std::is_same_v<typename TeamHandle::execution_space,
1163 Kokkos::Cuda>
1164#elif defined(KOKKOS_ENABLE_HIP)
1165 || std::is_same_v<typename TeamHandle::execution_space,
1166 Kokkos::HIP>
1167#elif defined(KOKKOS_ENABLE_SYCL)
1168 || std::is_same_v<typename TeamHandle::execution_space,
1169 Kokkos::SYCL>
1170#endif
1171 )
1172 policy.team.vector_reduce(
1173 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1174 val});
1175}
1176
1177template <typename Rank, typename TeamHandle, typename Lambda>
1178KOKKOS_INLINE_FUNCTION void parallel_for(
1179 ThreadVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1180 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1181}
1182
1183template <typename Rank, typename TeamHandle, typename Lambda,
1184 typename ReducerValueType>
1185KOKKOS_INLINE_FUNCTION void parallel_reduce(
1186 TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda,
1187 ReducerValueType& val) {
1188 static_assert(
1189 !std::is_array_v<ReducerValueType> &&
1190 !std::is_pointer_v<ReducerValueType> &&
1191 !Kokkos::is_reducer_v<ReducerValueType>,
1192 "Only a scalar return types are allowed!");
1193
1194 val = ReducerValueType{};
1195 Impl::md_parallel_impl<Rank>(policy, lambda, val);
1196 if constexpr (false
1197#ifdef KOKKOS_ENABLE_CUDA
1198 || std::is_same_v<typename TeamHandle::execution_space,
1199 Kokkos::Cuda>
1200#elif defined(KOKKOS_ENABLE_HIP)
1201 || std::is_same_v<typename TeamHandle::execution_space,
1202 Kokkos::HIP>
1203#elif defined(KOKKOS_ENABLE_SYCL)
1204 || std::is_same_v<typename TeamHandle::execution_space,
1205 Kokkos::SYCL>
1206#endif
1207 )
1208 policy.team.vector_reduce(
1209 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{
1210 val});
1211 policy.team.team_reduce(
1212 Kokkos::Sum<ReducerValueType, typename TeamHandle::execution_space>{val});
1213}
1214
1215template <typename Rank, typename TeamHandle, typename Lambda>
1216KOKKOS_INLINE_FUNCTION void parallel_for(
1217 TeamVectorMDRange<Rank, TeamHandle> const& policy, Lambda const& lambda) {
1218 Impl::md_parallel_impl<Rank>(policy, lambda, Impl::NoReductionTag());
1219}
1220
1221namespace Impl {
1222
1223template <typename FunctorType, typename TagType,
1224 bool HasTag = !std::is_void_v<TagType>>
1225struct ParallelConstructName;
1226
1227template <typename FunctorType, typename TagType>
1228struct ParallelConstructName<FunctorType, TagType, true> {
1229 ParallelConstructName(std::string const& label) : label_ref(label) {
1230 if (label.empty()) {
1231#ifdef KOKKOS_ENABLE_IMPL_TYPEINFO
1232 default_name =
1233 std::string(TypeInfo<std::remove_const_t<FunctorType>>::name()) +
1234 "/" + std::string(TypeInfo<TagType>::name());
1235#else
1236 default_name = std::string(typeid(FunctorType).name()) + "/" +
1237 typeid(TagType).name();
1238#endif
1239 }
1240 }
1241 std::string const& get() {
1242 return (label_ref.empty()) ? default_name : label_ref;
1243 }
1244 std::string const& label_ref;
1245 std::string default_name;
1246};
1247
1248template <typename FunctorType, typename TagType>
1249struct ParallelConstructName<FunctorType, TagType, false> {
1250 ParallelConstructName(std::string const& label) : label_ref(label) {
1251 if (label.empty()) {
1252#ifdef KOKKOS_ENABLE_IMPL_TYPEINFO
1253 default_name = TypeInfo<std::remove_const_t<FunctorType>>::name();
1254#else
1255 default_name = typeid(FunctorType).name();
1256#endif
1257 }
1258 }
1259 std::string const& get() {
1260 return (label_ref.empty()) ? default_name : label_ref;
1261 }
1262 std::string const& label_ref;
1263 std::string default_name;
1264};
1265
1266} // namespace Impl
1267
1268} // namespace Kokkos
1269
1270namespace Kokkos {
1271
1272namespace Impl {
1273
1274template <class PatternTag, class... Args>
1275struct PatternImplSpecializationFromTag;
1276
1277template <class... Args>
1278struct PatternImplSpecializationFromTag<Kokkos::ParallelForTag, Args...>
1279 : type_identity<ParallelFor<Args...>> {};
1280
1281template <class... Args>
1282struct PatternImplSpecializationFromTag<Kokkos::ParallelReduceTag, Args...>
1283 : type_identity<ParallelReduce<Args...>> {};
1284
1285template <class... Args>
1286struct PatternImplSpecializationFromTag<Kokkos::ParallelScanTag, Args...>
1287 : type_identity<ParallelScan<Args...>> {};
1288
1289template <class PatternImpl>
1290struct PatternTagFromImplSpecialization;
1291
1292template <class... Args>
1293struct PatternTagFromImplSpecialization<ParallelFor<Args...>>
1294 : type_identity<ParallelForTag> {};
1295
1296template <class... Args>
1297struct PatternTagFromImplSpecialization<ParallelReduce<Args...>>
1298 : type_identity<ParallelReduceTag> {};
1299
1300template <class... Args>
1301struct PatternTagFromImplSpecialization<ParallelScan<Args...>>
1302 : type_identity<ParallelScanTag> {};
1303
1304} // end namespace Impl
1305
1306} // namespace Kokkos
1307#endif /* #define KOKKOS_EXECPOLICY_HPP */
Implementation of the ParallelFor operator that has a partial specialization for the device.
Implementation detail of parallel_reduce.
Implementation detail of parallel_scan.
Execution policy for work over a range of an integral type.
RangePolicy(const typename traits::execution_space &work_space, const IndexType1 work_begin, const IndexType2 work_end)
Total range.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end)
Total range.
RangePolicy(const IndexType1 work_begin, const IndexType2 work_end, const ChunkSize chunk_size)
Total range.
RangePolicy & set_chunk_size(int chunk_size)
set chunk_size to a discrete value
member_type chunk_size() const
return chunk_size
Execution policy for parallel work over a league of teams of threads.
TeamPolicy(int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the default instance of the execution space.
TeamPolicy(const typename traits::execution_space &space_, int league_size_request, int team_size_request, int vector_length_request=1)
Construct policy with the given instance of the execution space.
ScopeGuard Some user scope issues have been identified with some Kokkos::finalize calls; ScopeGuard a...
Parallel execution of a functor calls the functor once with each member of the execution policy.
KOKKOS_INLINE_FUNCTION JoinOp::value_type team_reduce(const typename JoinOp::value_type, const JoinOp &) const
Intra-team reduction. Returns join of all values of the team members.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value, Type *const global_accum) const
Intra-team exclusive prefix sum with team_rank() ordering with intra-team non-deterministic ordering ...
KOKKOS_INLINE_FUNCTION int team_size() const
Number of threads in this team.
KOKKOS_INLINE_FUNCTION traits::execution_space::scratch_memory_space team_shmem() const
Handle to the currently executing team shared scratch memory.
KOKKOS_INLINE_FUNCTION int team_rank() const
Rank of this thread within this team.
KOKKOS_INLINE_FUNCTION int league_size() const
Number of teams in the league.
KOKKOS_INLINE_FUNCTION int league_rank() const
Rank of this team within the league of teams.
KOKKOS_INLINE_FUNCTION void team_barrier() const
Barrier among the threads of this team.
KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const
Intra-team exclusive prefix sum with team_rank() ordering.
Subrange for a partition's rank and size.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.