Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
MurmurHash3.cpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10//-----------------------------------------------------------------------------
11// MurmurHash3 was written by Austin Appleby, and is placed in the public
12// domain. The author hereby disclaims copyright to this source code.
13
14// Note - The x86 and x64 versions do _not_ produce the same results, as the
15// algorithms are optimized for their respective platforms. You can still
16// compile and run any of them on any platform, but your performance with the
17// non-native version will be less than optimal.
18
19#include "MurmurHash3.hpp"
20
21//-----------------------------------------------------------------------------
22// Platform-specific functions and macros
23
24// Microsoft Visual Studio
25#if defined(_MSC_VER)
26
27#define FORCE_INLINE __forceinline
28
29#include <stdlib.h>
30
31#define ROTL32(x,y) _rotl(x,y)
32#define ROTL64(x,y) _rotl64(x,y)
33
34#define BIG_CONSTANT(x) (x)
35
36// Other compilers
37
38#else // not defined(_MSC_VER)
39
40namespace { // anonymous
41
42inline uint32_t rotl32 ( uint32_t x, int8_t r )
43{
44 return (x << r) | (x >> (32 - r));
45}
46
47inline uint64_t rotl64 ( uint64_t x, int8_t r )
48{
49 return (x << r) | (x >> (64 - r));
50}
51
52} // namespace (anonymous)
53
54#define ROTL32(x,y) rotl32(x,y)
55#define ROTL64(x,y) rotl64(x,y)
56
57#define BIG_CONSTANT(x) (x##LLU)
58
59#endif // !defined(_MSC_VER)
60
61//-----------------------------------------------------------------------------
62// Block read - if your platform needs to do endian-swapping or can only
63// handle aligned reads, do the conversion here
64
65#define GETBLOCK(lhs, p, i ) \
66{ \
67 lhs = p[(i)];\
68} \
69
70
71//-----------------------------------------------------------------------------
72// Finalization mix - force all bits of a hash block to avalanche
73
74#define FMIX_32( h ) \
75{ \
76 uint32_t t_h = (h); \
77 t_h ^= t_h >> 16; \
78 t_h *= 0x85ebca6b; \
79 t_h ^= t_h >> 13; \
80 t_h *= 0xc2b2ae35; \
81 t_h ^= t_h >> 16; \
82 h = t_h; \
83} \
84
85//----------
86
87#define FMIX_64( k )\
88{\
89 uint64_t t_k = (k);\
90 t_k ^= t_k >> 33;\
91 t_k *= BIG_CONSTANT(0xff51afd7ed558ccd);\
92 t_k ^= t_k >> 33;\
93 t_k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);\
94 t_k ^= t_k >> 33;\
95 k = t_k;\
96}\
97
98//-----------------------------------------------------------------------------
99
100namespace Tpetra {
101namespace Details {
102
103void MurmurHash3_x86_32 ( const void * key, int len,
104 uint32_t seed, void * out )
105{
106 const uint8_t * data = (const uint8_t*)key;
107 const int nblocks = len / 4;
108
109 uint32_t h1 = seed;
110
111 const uint32_t c1 = 0xcc9e2d51;
112 const uint32_t c2 = 0x1b873593;
113
114 //----------
115 // body
116
117 const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
118
119 for(int i = -nblocks; i; i++)
120 {
121 uint32_t k1;
122 GETBLOCK(k1, blocks,i);
123
124 k1 *= c1;
125 k1 = ROTL32(k1,15);
126 k1 *= c2;
127
128 h1 ^= k1;
129 h1 = ROTL32(h1,13);
130 h1 = h1*5+0xe6546b64;
131 }
132
133 //----------
134 // tail
135
136 const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
137
138 uint32_t k1 = 0;
139
140 switch(len & 3)
141 {
142 case 3: k1 ^= tail[2] << 16;
143 case 2: k1 ^= tail[1] << 8;
144 case 1: k1 ^= tail[0];
145 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
146 };
147
148 //----------
149 // finalization
150
151 h1 ^= len;
152
153 FMIX_32(h1);
154
155 *(uint32_t*)out = h1;
156}
157
158//-----------------------------------------------------------------------------
159
160void MurmurHash3_x86_128 ( const void * key, const int len,
161 uint32_t seed, void * out )
162{
163 const uint8_t * data = (const uint8_t*)key;
164 const int nblocks = len / 16;
165
166 uint32_t h1 = seed;
167 uint32_t h2 = seed;
168 uint32_t h3 = seed;
169 uint32_t h4 = seed;
170
171 const uint32_t c1 = 0x239b961b;
172 const uint32_t c2 = 0xab0e9789;
173 const uint32_t c3 = 0x38b34ae5;
174 const uint32_t c4 = 0xa1e38b93;
175
176 //----------
177 // body
178
179 const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
180
181 for(int i = -nblocks; i; i++)
182 {
183 uint32_t k1, k2, k3, k4;
184 GETBLOCK(k1, blocks,i*4+0);
185 GETBLOCK(k2, blocks,i*4+1);
186 GETBLOCK(k3, blocks,i*4+2);
187 GETBLOCK(k4, blocks,i*4+3);
188
189 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
190
191 h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
192
193 k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
194
195 h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
196
197 k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
198
199 h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
200
201 k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
202
203 h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
204 }
205
206 //----------
207 // tail
208
209 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
210
211 uint32_t k1 = 0;
212 uint32_t k2 = 0;
213 uint32_t k3 = 0;
214 uint32_t k4 = 0;
215
216 switch(len & 15)
217 {
218 case 15: k4 ^= tail[14] << 16;
219 case 14: k4 ^= tail[13] << 8;
220 case 13: k4 ^= tail[12] << 0;
221 k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
222
223 case 12: k3 ^= tail[11] << 24;
224 case 11: k3 ^= tail[10] << 16;
225 case 10: k3 ^= tail[ 9] << 8;
226 case 9: k3 ^= tail[ 8] << 0;
227 k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
228
229 case 8: k2 ^= tail[ 7] << 24;
230 case 7: k2 ^= tail[ 6] << 16;
231 case 6: k2 ^= tail[ 5] << 8;
232 case 5: k2 ^= tail[ 4] << 0;
233 k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
234
235 case 4: k1 ^= tail[ 3] << 24;
236 case 3: k1 ^= tail[ 2] << 16;
237 case 2: k1 ^= tail[ 1] << 8;
238 case 1: k1 ^= tail[ 0] << 0;
239 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
240 };
241
242 //----------
243 // finalization
244
245 h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
246
247 h1 += h2; h1 += h3; h1 += h4;
248 h2 += h1; h3 += h1; h4 += h1;
249
250 FMIX_32(h1);
251 FMIX_32(h2);
252 FMIX_32(h3);
253 FMIX_32(h4);
254
255 h1 += h2; h1 += h3; h1 += h4;
256 h2 += h1; h3 += h1; h4 += h1;
257
258 ((uint32_t*)out)[0] = h1;
259 ((uint32_t*)out)[1] = h2;
260 ((uint32_t*)out)[2] = h3;
261 ((uint32_t*)out)[3] = h4;
262}
263
264//-----------------------------------------------------------------------------
265
266void MurmurHash3_x64_128 ( const void * key, const int len,
267 const uint32_t seed, void * out )
268{
269 const uint8_t * data = (const uint8_t*)key;
270 const int nblocks = len / 16;
271
272 uint64_t h1 = seed;
273 uint64_t h2 = seed;
274
275 const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
276 const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
277
278 //----------
279 // body
280
281 const uint64_t * blocks = (const uint64_t *)(data);
282
283 for(int i = 0; i < nblocks; i++)
284 {
285 uint64_t k1, k2;
286 GETBLOCK(k1, blocks,i*2+0);
287 GETBLOCK(k2, blocks,i*2+1);
288
289 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
290
291 h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
292
293 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
294
295 h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
296 }
297
298 //----------
299 // tail
300
301 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
302
303 uint64_t k1 = 0;
304 uint64_t k2 = 0;
305
306 switch(len & 15)
307 {
308 case 15: k2 ^= uint64_t(tail[14]) << 48;
309 case 14: k2 ^= uint64_t(tail[13]) << 40;
310 case 13: k2 ^= uint64_t(tail[12]) << 32;
311 case 12: k2 ^= uint64_t(tail[11]) << 24;
312 case 11: k2 ^= uint64_t(tail[10]) << 16;
313 case 10: k2 ^= uint64_t(tail[ 9]) << 8;
314 case 9: k2 ^= uint64_t(tail[ 8]) << 0;
315 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
316
317 case 8: k1 ^= uint64_t(tail[ 7]) << 56;
318 case 7: k1 ^= uint64_t(tail[ 6]) << 48;
319 case 6: k1 ^= uint64_t(tail[ 5]) << 40;
320 case 5: k1 ^= uint64_t(tail[ 4]) << 32;
321 case 4: k1 ^= uint64_t(tail[ 3]) << 24;
322 case 3: k1 ^= uint64_t(tail[ 2]) << 16;
323 case 2: k1 ^= uint64_t(tail[ 1]) << 8;
324 case 1: k1 ^= uint64_t(tail[ 0]) << 0;
325 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
326 };
327
328 //----------
329 // finalization
330
331 h1 ^= len; h2 ^= len;
332
333 h1 += h2;
334 h2 += h1;
335
336 FMIX_64(h1);
337 FMIX_64(h2);
338
339 h1 += h2;
340 h2 += h1;
341
342 ((uint64_t*)out)[0] = h1;
343 ((uint64_t*)out)[1] = h2;
344}
345
346} // namespace Details
347} // namespace Tpetra
348
349//-----------------------------------------------------------------------------
350
Nonmember function that computes a residual Computes R = B - A * X.
Namespace Tpetra contains the class and methods constituting the Tpetra library.