Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_shortSort.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_SHORTSORT_HPP
11#define TPETRA_DETAILS_SHORTSORT_HPP
12
20
21#include "TpetraCore_config.h"
22#include "Kokkos_Macros.hpp"
23#include <type_traits>
24
25namespace Tpetra {
26namespace Details {
27
28// Make sure that the macro defined below wasn't defined somewhere else.
29#ifdef TPETRA_DETAILS_SWAP_KEYSANDVALUES
30# error "The TPETRA_DETAILS_SWAP_KEYSANDVALUES macro is already defined."
31#endif // TPETRA_DETAILS_SWAP_KEYSANDVALUES
32
53#define TPETRA_DETAILS_SWAP_KEYSANDVALUES( i, j ) \
54 if (keys[i] > keys[j]) { \
55 const KeyType tmpKey (keys[i]); \
56 keys[i] = keys[j]; \
57 keys[j] = tmpKey; \
58 const ValueType tmpVal (values[i]); \
59 values[i] = values[j]; \
60 values[j] = tmpVal; \
61 }
62
63// Make sure that the macro defined below wasn't defined somewhere else.
64#ifdef TPETRA_DETAILS_SWAP_KEYS
65# error "The TPETRA_DETAILS_SWAP_KEYS macro is already defined."
66#endif // TPETRA_DETAILS_SWAP_KEYSANDVALUES
67
85#define TPETRA_DETAILS_SWAP_KEYS( i, j ) \
86 if (keys[i] > keys[j]) { \
87 const KeyType tmpKey (keys[i]); \
88 keys[i] = keys[j]; \
89 keys[j] = tmpKey; \
90 }
91
102template<class KeyType, class ValueType>
103KOKKOS_FUNCTION void
104shortSortKeysAndValues_2 (KeyType keys[2], ValueType values[2])
105{
106 // Since this function takes a constant number of entries, I use a
107 // sorting network here. For 2 entries, the sorting network is
108 // nearly trivial.
110}
111
118template<class KeyType>
119KOKKOS_FUNCTION void
120shortSortKeys_2 (KeyType keys[2])
121{
122 // Since this function takes a constant number of entries, I use a
123 // sorting network here. For 2 entries, the sorting network is
124 // nearly trivial.
126}
127
138template<class KeyType, class ValueType>
139KOKKOS_FUNCTION void
140shortSortKeysAndValues_3 (KeyType keys[3], ValueType values[3])
141{
142 // Since this function takes a constant number of entries, I use a
143 // sorting network here. To make the network, I used the generator
144 // at
145 //
146 // http://pages.ripco.net/~jgamble/nw.html
147 //
148 // "Best" algorithm in this case is the Bose-Nelson Algorithm.
152}
153
160template<class KeyType>
161KOKKOS_FUNCTION void
162shortSortKeys_3 (KeyType keys[3])
163{
164 // Since this function takes a constant number of entries, I use a
165 // sorting network here. To make the network, I used the generator
166 // at
167 //
168 // http://pages.ripco.net/~jgamble/nw.html
169 //
170 // "Best" algorithm in this case is the Bose-Nelson Algorithm.
174}
175
186template<class KeyType, class ValueType>
187KOKKOS_FUNCTION void
188shortSortKeysAndValues_4 (KeyType keys[4], ValueType values[4])
189{
190 // Since this function takes a constant number of entries, I use a
191 // sorting network here. To make the network, I used the generator
192 // at
193 //
194 // http://pages.ripco.net/~jgamble/nw.html
195 //
196 // "Best" algorithm in this case is the Bose-Nelson Algorithm.
202}
203
210template<class KeyType>
211KOKKOS_FUNCTION void
212shortSortKeys_4 (KeyType keys[4])
213{
214 // Since this function takes a constant number of entries, I use a
215 // sorting network here. To make the network, I used the generator
216 // at
217 //
218 // http://pages.ripco.net/~jgamble/nw.html
219 //
220 // "Best" algorithm in this case is the Bose-Nelson Algorithm.
226}
227
238template<class KeyType, class ValueType>
239KOKKOS_FUNCTION void
240shortSortKeysAndValues_8 (KeyType keys[8], ValueType values[8])
241{
242 // Since this function takes a constant number of entries, I use a
243 // sorting network here. To make the network, I used the generator
244 // at
245 //
246 // http://pages.ripco.net/~jgamble/nw.html
247 //
248 // "Best" algorithm in this case is the Bose-Nelson Algorithm.
268}
269
276template<class KeyType>
277KOKKOS_FUNCTION void
278shortSortKeys_8 (KeyType keys[8])
279{
280 // Since this function takes a constant number of entries, I use a
281 // sorting network here. To make the network, I used the generator
282 // at
283 //
284 // http://pages.ripco.net/~jgamble/nw.html
285 //
286 // "Best" algorithm in this case is the Bose-Nelson Algorithm.
306}
307
313template<class KeyType, class ValueType, class IndexType>
314KOKKOS_FUNCTION void
316 ValueType values[],
317 const IndexType n)
318{
319 static_assert (std::is_integral<IndexType>::value,
320 "IndexType must be a signed integer type.");
321 static_assert (std::is_signed<IndexType>::value,
322 "IndexType must be a signed integer type. "
323 "This implementation does a count-down loop, "
324 "and may thus loop forever "
325 "if one attempts to use it with unsigned IndexType.");
326 constexpr IndexType ZERO = 0;
327 IndexType midpoint = n / static_cast<IndexType> (2);
328
329 while (midpoint > ZERO) {
330 // Avoid names like "max" in case they collide with macros.
331 const IndexType theMax = n - midpoint;
332 for (IndexType j = 0; j < theMax; ++j) {
333 // IndexType is signed, so it's legit to compare >= 0.
334 for (IndexType k = j; k >= 0; k -= midpoint) {
335 if (keys[k + midpoint] >= keys[k]) {
336 break;
337 }
338 const KeyType tmpKey = keys[k + midpoint];
339 keys[k + midpoint] = keys[k];
340 keys[k] = tmpKey;
341 const ValueType tmpVal = values[k + midpoint];
342 values[k + midpoint] = values[k];
343 values[k] = tmpVal;
344 }
345 }
346 midpoint = midpoint / 2;
347 }
348}
349
354template<class KeyType, class IndexType>
355KOKKOS_FUNCTION void
356shellSortKeys (KeyType keys[], const IndexType n)
357{
358 static_assert (std::is_integral<IndexType>::value,
359 "IndexType must be a signed integer type.");
360 static_assert (std::is_signed<IndexType>::value,
361 "IndexType must be a signed integer type. "
362 "This implementation does a count-down loop, "
363 "and may thus loop forever "
364 "if one attempts to use it with unsigned IndexType.");
365 constexpr IndexType ZERO = 0;
366 IndexType midpoint = n / static_cast<IndexType> (2);
367
368 while (midpoint > ZERO) {
369 // Avoid names like "max" in case they collide with macros.
370 const IndexType theMax = n - midpoint;
371 for (int j = 0; j < theMax; ++j) {
372 // IndexType must be signed, so it's legit to compare >= 0.
373 for (int k = j; k >= 0; k -= midpoint) {
374 if (keys[k + midpoint] >= keys[k]) {
375 break;
376 }
377 const KeyType tmpKey = keys[k + midpoint];
378 keys[k + midpoint] = keys[k];
379 keys[k] = tmpKey;
380 }
381 }
382 midpoint = midpoint / 2;
383 }
384}
385
386template<typename KeyType, typename ValueType, typename IndexType>
387KOKKOS_FUNCTION void
388shellSortKeysAndValues2(KeyType* keys, ValueType* values, IndexType n)
389{
390 IndexType ngaps = 10;
391 //Use Ciura's gap choices
392 IndexType gaps[] = {3548, 1577, 701, 301, 132, 57, 23, 10, 4, 1};
393 for(IndexType gapIndex = 0; gapIndex < ngaps; gapIndex++)
394 {
395 auto gap = gaps[gapIndex];
396 if(n < gap)
397 continue;
398 //insertion sort the array {keys[0*gap], keys[1*gap], keys[2*gap], ...}
399 for(IndexType gapOffset = 0; gapOffset < gap; gapOffset++)
400 {
401 for(IndexType i = gap + gapOffset; i < n; i += gap)
402 {
403 //avoid extra swaps: scan for the final position of keys[i]
404 if(keys[i - gap] > keys[i])
405 {
406 IndexType finalPos = i - gap;
407 while(finalPos - gap >= 0 && keys[finalPos - gap] > keys[i])
408 {
409 finalPos -= gap;
410 }
411 //save keys/values [i], then shift up all keys/values between finalPos and i-gap (inclusive)
412 auto tempKey = keys[i];
413 auto tempVal = values[i];
414 for(IndexType j = i - gap; j >= finalPos; j -= gap)
415 {
416 keys[j + gap] = keys[j];
417 values[j + gap] = values[j];
418 }
419 keys[finalPos] = tempKey;
420 values[finalPos] = tempVal;
421 }
422 }
423 }
424 }
425#undef SHELL_SWAP
426}
427
428} // namespace Details
429} // namespace Tpetra
430
431#endif // TPETRA_DETAILS_SHORTSORT_HPP
#define TPETRA_DETAILS_SWAP_KEYS(i, j)
Macro that swaps the i and j entries of keys, if keys[i] > keys[j] (i.e., if keys[i] and keys[j] are ...
#define TPETRA_DETAILS_SWAP_KEYSANDVALUES(i, j)
Macro that swaps the i and j entries of keys and values, if keys[i] > keys[j] (i.e....
Nonmember function that computes a residual Computes R = B - A * X.
KOKKOS_FUNCTION void shellSortKeys(KeyType keys[], const IndexType n)
Shellsort (yes, it's one word) the input array keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_3(KeyType keys[3], ValueType values[3])
Sort keys and values jointly, by keys, for arrays of length 3.
KOKKOS_FUNCTION void shortSortKeysAndValues_8(KeyType keys[8], ValueType values[8])
Sort keys and values jointly, by keys, for arrays of length 8.
KOKKOS_FUNCTION void shellSortKeysAndValues(KeyType keys[], ValueType values[], const IndexType n)
Shellsort (yes, it's one word) the input array keys, and apply the resulting permutation to the input...
KOKKOS_FUNCTION void shortSortKeys_3(KeyType keys[3])
Sort length-3 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_2(KeyType keys[2], ValueType values[2])
Sort keys and values jointly, by keys, for arrays of length 2.
KOKKOS_FUNCTION void shortSortKeys_2(KeyType keys[2])
Sort length-2 array of keys.
KOKKOS_FUNCTION void shortSortKeys_8(KeyType keys[8])
Sort length-8 array of keys.
KOKKOS_FUNCTION void shortSortKeysAndValues_4(KeyType keys[4], ValueType values[4])
Sort keys and values jointly, by keys, for arrays of length 4.
KOKKOS_FUNCTION void shortSortKeys_4(KeyType keys[4])
Sort length-4 array of keys.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
@ ZERO
Replace old values with zero.