Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_BitManipulation.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_BIT_MANIPULATION_HPP
18#define KOKKOS_BIT_MANIPULATION_HPP
19
20#include <Kokkos_Macros.hpp>
21#include <Kokkos_NumericTraits.hpp>
22#include <climits> // CHAR_BIT
23#include <cstring> //memcpy
24#include <type_traits>
25
26namespace Kokkos::Impl {
27
28template <class T>
29KOKKOS_FUNCTION constexpr T byteswap_fallback(T x) {
30 if constexpr (sizeof(T) > 1) {
31 using U = std::make_unsigned_t<T>;
32
33 size_t shift = CHAR_BIT * (sizeof(T) - 1);
34
35 U lo_mask = static_cast<unsigned char>(~0);
36 U hi_mask = lo_mask << shift;
37
38 U val = x;
39
40 for (size_t i = 0; i < sizeof(T) / 2; ++i) {
41 U lo_val = val & lo_mask;
42 U hi_val = val & hi_mask;
43
44 val = (val & ~lo_mask) | (hi_val >> shift);
45 val = (val & ~hi_mask) | (lo_val << shift);
46
47 lo_mask <<= CHAR_BIT;
48 hi_mask >>= CHAR_BIT;
49
50 shift -= 2 * CHAR_BIT;
51 }
52 return val;
53 }
54 // sizeof(T) == 1
55 return x;
56}
57
58template <class T>
59KOKKOS_FUNCTION constexpr int countl_zero_fallback(T x) {
60 // From Hacker's Delight (2nd edition) section 5-3
61 unsigned int y = 0;
62 using ::Kokkos::Experimental::digits_v;
63 int n = digits_v<T>;
64 int c = digits_v<T> / 2;
65 do {
66 y = x >> c;
67 if (y != 0) {
68 n -= c;
69 x = y;
70 }
71 c >>= 1;
72 } while (c != 0);
73 return n - static_cast<int>(x);
74}
75
76template <class T>
77KOKKOS_FUNCTION constexpr int countr_zero_fallback(T x) {
78 using ::Kokkos::Experimental::digits_v;
79 return digits_v<T> - countl_zero_fallback(static_cast<T>(
80 static_cast<T>(~x) & static_cast<T>(x - 1)));
81}
82
83template <class T>
84KOKKOS_FUNCTION constexpr int popcount_fallback(T x) {
85 int c = 0;
86 for (; x != 0; x &= x - 1) {
87 ++c;
88 }
89 return c;
90}
91
92template <class T>
93inline constexpr bool is_standard_unsigned_integer_type_v =
94 std::is_same_v<T, unsigned char> || std::is_same_v<T, unsigned short> ||
95 std::is_same_v<T, unsigned int> || std::is_same_v<T, unsigned long> ||
96 std::is_same_v<T, unsigned long long>;
97
98} // namespace Kokkos::Impl
99
100namespace Kokkos {
101
102//<editor-fold desc="[bit.cast], bit_cast">
103#if defined(KOKKOS_ENABLE_SYCL) && defined(__INTEL_LLVM_COMPILER) && \
104 __INTEL_LLVM_COMPILER < 20240000
105using sycl::detail::bit_cast;
106#else
107template <class To, class From>
108KOKKOS_FUNCTION std::enable_if_t<sizeof(To) == sizeof(From) &&
109 std::is_trivially_copyable_v<To> &&
110 std::is_trivially_copyable_v<From>,
111 To>
112bit_cast(From const& from) noexcept {
113#if defined(KOKKOS_ENABLE_SYCL) && defined(__INTEL_LLVM_COMPILER) && \
114 __INTEL_LLVM_COMPILER >= 20240000
115 return sycl::bit_cast<To>(from);
116#else
117 To to;
118 memcpy(static_cast<void*>(&to), static_cast<const void*>(&from), sizeof(To));
119 return to;
120#endif
121}
122#endif
123//</editor-fold>
124
125//<editor-fold desc="[bit.byteswap], byteswap">
126template <class T>
127KOKKOS_FUNCTION constexpr std::enable_if_t<std::is_integral_v<T>, T> byteswap(
128 T value) noexcept {
129 return Impl::byteswap_fallback(value);
130}
131//</editor-fold>
132
133//<editor-fold desc="[bit.count], counting">
134template <class T>
135KOKKOS_FUNCTION constexpr std::enable_if_t<
136 Impl::is_standard_unsigned_integer_type_v<T>, int>
137countl_zero(T x) noexcept {
138 using ::Kokkos::Experimental::digits_v;
139 if (x == 0) return digits_v<T>;
140 // TODO use compiler intrinsics when available
141 return Impl::countl_zero_fallback(x);
142}
143
144template <class T>
145KOKKOS_FUNCTION constexpr std::enable_if_t<
146 Impl::is_standard_unsigned_integer_type_v<T>, int>
147countl_one(T x) noexcept {
148 using ::Kokkos::Experimental::digits_v;
149 using ::Kokkos::Experimental::finite_max_v;
150 if (x == finite_max_v<T>) return digits_v<T>;
151 return countl_zero(static_cast<T>(~x));
152}
153
154template <class T>
155KOKKOS_FUNCTION constexpr std::enable_if_t<
156 Impl::is_standard_unsigned_integer_type_v<T>, int>
157countr_zero(T x) noexcept {
158 using ::Kokkos::Experimental::digits_v;
159 if (x == 0) return digits_v<T>;
160 // TODO use compiler intrinsics when available
161 return Impl::countr_zero_fallback(x);
162}
163
164template <class T>
165KOKKOS_FUNCTION constexpr std::enable_if_t<
166 Impl::is_standard_unsigned_integer_type_v<T>, int>
167countr_one(T x) noexcept {
168 using ::Kokkos::Experimental::digits_v;
169 using ::Kokkos::Experimental::finite_max_v;
170 if (x == finite_max_v<T>) return digits_v<T>;
171 return countr_zero(static_cast<T>(~x));
172}
173
174template <class T>
175KOKKOS_FUNCTION constexpr std::enable_if_t<
176 Impl::is_standard_unsigned_integer_type_v<T>, int>
177popcount(T x) noexcept {
178 if (x == 0) return 0;
179 // TODO use compiler intrinsics when available
180 return Impl::popcount_fallback(x);
181}
182//</editor-fold>
183
184//<editor-fold desc="[bit.pow.two], integral powers of 2">
185template <class T>
186KOKKOS_FUNCTION constexpr std::enable_if_t<
187 Impl::is_standard_unsigned_integer_type_v<T>, bool>
188has_single_bit(T x) noexcept {
189 return x != 0 && (((x & (x - 1)) == 0));
190}
191
192template <class T>
193KOKKOS_FUNCTION constexpr std::enable_if_t<
194 Impl::is_standard_unsigned_integer_type_v<T>, T>
195bit_ceil(T x) noexcept {
196 if (x <= 1) return 1;
197 using ::Kokkos::Experimental::digits_v;
198 return T{1} << (digits_v<T> - countl_zero(static_cast<T>(x - 1)));
199}
200
201template <class T>
202KOKKOS_FUNCTION constexpr std::enable_if_t<
203 Impl::is_standard_unsigned_integer_type_v<T>, T>
204bit_floor(T x) noexcept {
205 if (x == 0) return 0;
206 using ::Kokkos::Experimental::digits_v;
207 return T{1} << (digits_v<T> - 1 - countl_zero(x));
208}
209
210template <class T>
211KOKKOS_FUNCTION constexpr std::enable_if_t<
212 Impl::is_standard_unsigned_integer_type_v<T>, T>
213bit_width(T x) noexcept {
214 if (x == 0) return 0;
215 using ::Kokkos::Experimental::digits_v;
216 return digits_v<T> - countl_zero(x);
217}
218//</editor-fold>
219
220//<editor-fold desc="[bit.rotate], rotating">
221template <class T>
222[[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
223 Impl::is_standard_unsigned_integer_type_v<T>, T>
224rotl(T x, int s) noexcept {
225 using Experimental::digits_v;
226 constexpr auto dig = digits_v<T>;
227 int const rem = s % dig;
228 if (rem == 0) return x;
229 if (rem > 0) return (x << rem) | (x >> ((dig - rem) % dig));
230 return (x >> -rem) | (x << ((dig + rem) % dig)); // rotr(x, -rem)
231}
232
233template <class T>
234[[nodiscard]] KOKKOS_FUNCTION constexpr std::enable_if_t<
235 Impl::is_standard_unsigned_integer_type_v<T>, T>
236rotr(T x, int s) noexcept {
237 using Experimental::digits_v;
238 constexpr auto dig = digits_v<T>;
239 int const rem = s % dig;
240 if (rem == 0) return x;
241 if (rem > 0) return (x >> rem) | (x << ((dig - rem) % dig));
242 return (x << -rem) | (x >> ((dig + rem) % dig)); // rotl(x, -rem)
243}
244//</editor-fold>
245
246} // namespace Kokkos
247
248namespace Kokkos::Impl {
249
250#if defined(KOKKOS_COMPILER_CLANG) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \
251 defined(KOKKOS_COMPILER_GNU)
252#define KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
253#endif
254
255template <class T>
256KOKKOS_IMPL_DEVICE_FUNCTION T byteswap_builtin_device(T x) noexcept {
257 return byteswap_fallback(x);
258}
259
260template <class T>
261KOKKOS_IMPL_HOST_FUNCTION T byteswap_builtin_host(T x) noexcept {
262#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
263 if constexpr (sizeof(T) == 1) {
264 return x;
265 } else if constexpr (sizeof(T) == 2) {
266 return __builtin_bswap16(x);
267 } else if constexpr (sizeof(T) == 4) {
268 return __builtin_bswap32(x);
269 } else if constexpr (sizeof(T) == 8) {
270 return __builtin_bswap64(x);
271 } else if constexpr (sizeof(T) == 16) {
272#if defined(__has_builtin)
273#if __has_builtin(__builtin_bswap128)
274 return __builtin_bswap128(x);
275#endif
276#endif
277 return (__builtin_bswap64(x >> 64) |
278 (static_cast<T>(__builtin_bswap64(x)) << 64));
279 }
280#endif
281
282 return byteswap_fallback(x);
283}
284
285template <class T>
286KOKKOS_IMPL_DEVICE_FUNCTION
287 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
288 countl_zero_builtin_device(T x) noexcept {
289#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
290 if constexpr (sizeof(T) == sizeof(long long int))
291 return __clzll(reinterpret_cast<long long int&>(x));
292 if constexpr (sizeof(T) == sizeof(int))
293 return __clz(reinterpret_cast<int&>(x));
294 using ::Kokkos::Experimental::digits_v;
295 constexpr int shift = digits_v<unsigned int> - digits_v<T>;
296 return __clz(x) - shift;
297#elif defined(KOKKOS_ENABLE_SYCL)
298 return sycl::clz(x);
299#else
300 return countl_zero_fallback(x);
301#endif
302}
303
304template <class T>
305KOKKOS_IMPL_HOST_FUNCTION
306 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
307 countl_zero_builtin_host(T x) noexcept {
308 using ::Kokkos::Experimental::digits_v;
309 if (x == 0) return digits_v<T>;
310#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
311 if constexpr (std::is_same_v<T, unsigned long long>) {
312 return __builtin_clzll(x);
313 } else if constexpr (std::is_same_v<T, unsigned long>) {
314 return __builtin_clzl(x);
315 } else if constexpr (std::is_same_v<T, unsigned int>) {
316 return __builtin_clz(x);
317 } else {
318 constexpr int shift = digits_v<unsigned int> - digits_v<T>;
319 return __builtin_clz(x) - shift;
320 }
321#else
322 return countl_zero_fallback(x);
323#endif
324}
325
326template <class T>
327KOKKOS_IMPL_DEVICE_FUNCTION
328 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
329 countr_zero_builtin_device(T x) noexcept {
330 using ::Kokkos::Experimental::digits_v;
331 if (x == 0) return digits_v<T>;
332#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
333 if constexpr (sizeof(T) == sizeof(long long int))
334 return __ffsll(reinterpret_cast<long long int&>(x)) - 1;
335 return __ffs(reinterpret_cast<int&>(x)) - 1;
336#elif defined(KOKKOS_ENABLE_SYCL)
337 return sycl::ctz(x);
338#else
339 return countr_zero_fallback(x);
340#endif
341}
342
343template <class T>
344KOKKOS_IMPL_HOST_FUNCTION
345 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
346 countr_zero_builtin_host(T x) noexcept {
347 using ::Kokkos::Experimental::digits_v;
348 if (x == 0) return digits_v<T>;
349#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
350 if constexpr (std::is_same_v<T, unsigned long long>) {
351 return __builtin_ctzll(x);
352 } else if constexpr (std::is_same_v<T, unsigned long>) {
353 return __builtin_ctzl(x);
354 } else {
355 return __builtin_ctz(x);
356 }
357#else
358 return countr_zero_fallback(x);
359#endif
360}
361
362template <class T>
363KOKKOS_IMPL_DEVICE_FUNCTION
364 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
365 popcount_builtin_device(T x) noexcept {
366#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
367 if constexpr (sizeof(T) == sizeof(long long int)) return __popcll(x);
368 return __popc(x);
369#elif defined(KOKKOS_ENABLE_SYCL)
370 return sycl::popcount(x);
371#else
372 return popcount_fallback(x);
373#endif
374}
375
376template <class T>
377KOKKOS_IMPL_HOST_FUNCTION
378 std::enable_if_t<is_standard_unsigned_integer_type_v<T>, int>
379 popcount_builtin_host(T x) noexcept {
380#ifdef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
381 if constexpr (std::is_same_v<T, unsigned long long>) {
382 return __builtin_popcountll(x);
383 } else if constexpr (std::is_same_v<T, unsigned long>) {
384 return __builtin_popcountl(x);
385 } else {
386 return __builtin_popcount(x);
387 }
388#else
389 return popcount_fallback(x);
390#endif
391}
392
393#undef KOKKOS_IMPL_USE_GCC_BUILT_IN_FUNCTIONS
394
395} // namespace Kokkos::Impl
396
397namespace Kokkos::Experimental {
398
399template <class To, class From>
400KOKKOS_FUNCTION std::enable_if_t<sizeof(To) == sizeof(From) &&
401 std::is_trivially_copyable_v<To> &&
402 std::is_trivially_copyable_v<From>,
403 To>
404bit_cast_builtin(From const& from) noexcept {
405 // qualify the call to avoid ADL
406 return Kokkos::bit_cast<To>(from); // no benefit to call the _builtin variant
407}
408
409template <class T>
410KOKKOS_FUNCTION std::enable_if_t<std::is_integral_v<T>, T> byteswap_builtin(
411 T x) noexcept {
412 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::byteswap_builtin_device(x);))
413 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::byteswap_builtin_host(x);))
414}
415
416template <class T>
417KOKKOS_FUNCTION std::enable_if_t<
418 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
419countl_zero_builtin(T x) noexcept {
420 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countl_zero_builtin_device(x);))
421 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countl_zero_builtin_host(x);))
422}
423
424template <class T>
425KOKKOS_FUNCTION std::enable_if_t<
426 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
427countl_one_builtin(T x) noexcept {
428 if (x == finite_max_v<T>) return digits_v<T>;
429 return countl_zero_builtin(static_cast<T>(~x));
430}
431
432template <class T>
433KOKKOS_FUNCTION std::enable_if_t<
434 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
435countr_zero_builtin(T x) noexcept {
436 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::countr_zero_builtin_device(x);))
437 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::countr_zero_builtin_host(x);))
438}
439
440template <class T>
441KOKKOS_FUNCTION std::enable_if_t<
442 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
443countr_one_builtin(T x) noexcept {
444 if (x == finite_max_v<T>) return digits_v<T>;
445 return countr_zero_builtin(static_cast<T>(~x));
446}
447
448template <class T>
449KOKKOS_FUNCTION std::enable_if_t<
450 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, int>
451popcount_builtin(T x) noexcept {
452 KOKKOS_IF_ON_DEVICE((return ::Kokkos::Impl::popcount_builtin_device(x);))
453 KOKKOS_IF_ON_HOST((return ::Kokkos::Impl::popcount_builtin_host(x);))
454}
455
456template <class T>
457KOKKOS_FUNCTION std::enable_if_t<
458 ::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, bool>
459has_single_bit_builtin(T x) noexcept {
460 return has_single_bit(x); // no benefit to call the _builtin variant
461}
462
463template <class T>
464KOKKOS_FUNCTION
465 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
466 bit_ceil_builtin(T x) noexcept {
467 if (x <= 1) return 1;
468 return T{1} << (digits_v<T> - countl_zero_builtin(static_cast<T>(x - 1)));
469}
470
471template <class T>
472KOKKOS_FUNCTION
473 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
474 bit_floor_builtin(T x) noexcept {
475 if (x == 0) return 0;
476 return T{1} << (digits_v<T> - 1 - countl_zero_builtin(x));
477}
478
479template <class T>
480KOKKOS_FUNCTION
481 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
482 bit_width_builtin(T x) noexcept {
483 if (x == 0) return 0;
484 return digits_v<T> - countl_zero_builtin(x);
485}
486
487template <class T>
488[[nodiscard]] KOKKOS_FUNCTION
489 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
490 rotl_builtin(T x, int s) noexcept {
491 return rotl(x, s); // no benefit to call the _builtin variant
492}
493
494template <class T>
495[[nodiscard]] KOKKOS_FUNCTION
496 std::enable_if_t<::Kokkos::Impl::is_standard_unsigned_integer_type_v<T>, T>
497 rotr_builtin(T x, int s) noexcept {
498 return rotr(x, s); // no benefit to call the _builtin variant
499}
500
501} // namespace Kokkos::Experimental
502
503#endif
ScopeGuard Some user scope issues have been identified with some Kokkos::finalize calls; ScopeGuard a...