Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_radixSort.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_RADIXSORT_HPP
11#define TPETRA_DETAILS_RADIXSORT_HPP
12
13#include "TpetraCore_config.h"
14#include "Kokkos_Macros.hpp"
15
16namespace Tpetra
17{
18namespace Details
19{
20
32template<typename KeyType, typename ValueType, typename IndexType>
33KOKKOS_INLINE_FUNCTION void
34radixSortKeysAndValues(KeyType* keys, KeyType* keysAux, ValueType* values, ValueType* valuesAux, IndexType n, IndexType upperBound)
35{
36 if(n <= 1)
37 return;
38 KeyType mask = 0xF;
39 bool inAux = false;
40 //maskPos counts the low bit index of mask (0, 4, 8, ...)
41 IndexType maskPos = 0;
42 //Count number of bits required to sort (8 * sizeof(KeyType) - lzcnt(maxKey - minKey))
43 IndexType sortBits = 0;
44 while(upperBound)
45 {
46 upperBound >>= 1;
47 sortBits++;
48 }
49 for(IndexType s = 0; s < (sortBits + 3) / 4; s++)
50 {
51 //Count the number of elements in each bucket
52 IndexType count[16] = {0};
53 IndexType offset[17] = {0};
54 if(!inAux)
55 {
56 for(IndexType i = 0; i < n; i++)
57 {
58 count[(keys[i] & mask) >> maskPos]++;
59 }
60 }
61 else
62 {
63 for(IndexType i = 0; i < n; i++)
64 {
65 count[(keysAux[i] & mask) >> maskPos]++;
66 }
67 }
68 //get offset as the prefix sum for count
69 for(IndexType i = 0; i < 16; i++)
70 {
71 offset[i + 1] = offset[i] + count[i];
72 }
73 //now for each element in [lo, hi), move it to its offset in the other buffer
74 //this branch should be ok because whichBuf is the same on all threads
75 if(!inAux)
76 {
77 //copy from *Over to *Aux
78 for(IndexType i = 0; i < n; i++)
79 {
80 IndexType bucket = (keys[i] & mask) >> maskPos;
81 keysAux[offset[bucket + 1] - count[bucket]] = keys[i];
82 valuesAux[offset[bucket + 1] - count[bucket]] = values[i];
83 count[bucket]--;
84 }
85 }
86 else
87 {
88 //copy from *Aux to *Over
89 for(IndexType i = 0; i < n; i++)
90 {
91 IndexType bucket = (keysAux[i] & mask) >> maskPos;
92 keys[offset[bucket + 1] - count[bucket]] = keysAux[i];
93 values[offset[bucket + 1] - count[bucket]] = valuesAux[i];
94 count[bucket]--;
95 }
96 }
97 inAux = !inAux;
98 mask = mask << 4;
99 maskPos += 4;
100 }
101 if(inAux)
102 {
103 //need to deep copy from aux arrays to main
104 for(IndexType i = 0; i < n; i++)
105 {
106 keys[i] = keysAux[i];
107 values[i] = valuesAux[i];
108 }
109 }
110}
111
112} //namespace Details
113} //namespace Tpetra
114
115#endif //include guard
116
Nonmember function that computes a residual Computes R = B - A * X.
KOKKOS_INLINE_FUNCTION void radixSortKeysAndValues(KeyType *keys, KeyType *keysAux, ValueType *values, ValueType *valuesAux, IndexType n, IndexType upperBound)
Radix sort the input array keys, and permute values identically to the keys.
Namespace Tpetra contains the class and methods constituting the Tpetra library.