Hash function switched to murmurhash2.
[redis.git] / src / intset.c
CommitLineData
144b0094
PN
1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
144b0094
PN
4#include "intset.h"
5#include "zmalloc.h"
7a3e3720 6#include "endianconv.h"
144b0094
PN
7
8/* Note that these encodings are ordered, so:
9 * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
10#define INTSET_ENC_INT16 (sizeof(int16_t))
11#define INTSET_ENC_INT32 (sizeof(int32_t))
12#define INTSET_ENC_INT64 (sizeof(int64_t))
13
76864d56
PN
14/* Return the required encoding for the provided value. */
15static uint8_t _intsetValueEncoding(int64_t v) {
16 if (v < INT32_MIN || v > INT32_MAX)
17 return INTSET_ENC_INT64;
18 else if (v < INT16_MIN || v > INT16_MAX)
19 return INTSET_ENC_INT32;
dc75b1ed 20 else
21 return INTSET_ENC_INT16;
76864d56
PN
22}
23
24/* Return the value at pos, given an encoding. */
25static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
dc75b1ed 26 int64_t v64;
27 int32_t v32;
28 int16_t v16;
29
30 if (enc == INTSET_ENC_INT64) {
31 memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));
32 memrev64ifbe(&v64);
33 return v64;
34 } else if (enc == INTSET_ENC_INT32) {
35 memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));
36 memrev32ifbe(&v32);
37 return v32;
38 } else {
39 memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));
40 memrev16ifbe(&v16);
41 return v16;
42 }
76864d56
PN
43}
44
45/* Return the value at pos, using the configured encoding. */
46static int64_t _intsetGet(intset *is, int pos) {
6136a16b 47 return _intsetGetEncoded(is,pos,intrev32ifbe(is->encoding));
76864d56
PN
48}
49
50/* Set the value at pos, using the configured encoding. */
51static void _intsetSet(intset *is, int pos, int64_t value) {
6136a16b 52 uint32_t encoding = intrev32ifbe(is->encoding);
53
54 if (encoding == INTSET_ENC_INT64) {
76864d56 55 ((int64_t*)is->contents)[pos] = value;
dc75b1ed 56 memrev64ifbe(((int64_t*)is->contents)+pos);
6136a16b 57 } else if (encoding == INTSET_ENC_INT32) {
76864d56 58 ((int32_t*)is->contents)[pos] = value;
dc75b1ed 59 memrev32ifbe(((int32_t*)is->contents)+pos);
60 } else {
76864d56 61 ((int16_t*)is->contents)[pos] = value;
dc75b1ed 62 memrev16ifbe(((int16_t*)is->contents)+pos);
63 }
76864d56 64}
144b0094
PN
65
66/* Create an empty intset. */
67intset *intsetNew(void) {
68 intset *is = zmalloc(sizeof(intset));
6136a16b 69 is->encoding = intrev32ifbe(INTSET_ENC_INT16);
144b0094
PN
70 is->length = 0;
71 return is;
72}
73
74/* Resize the intset */
75static intset *intsetResize(intset *is, uint32_t len) {
6136a16b 76 uint32_t size = len*intrev32ifbe(is->encoding);
144b0094
PN
77 is = zrealloc(is,sizeof(intset)+size);
78 return is;
79}
80
144b0094
PN
81/* Search for the position of "value". Return 1 when the value was found and
82 * sets "pos" to the position of the value within the intset. Return 0 when
83 * the value is not present in the intset and sets "pos" to the position
84 * where "value" can be inserted. */
85static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
6136a16b 86 int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
d0b58d53 87 int64_t cur = -1;
144b0094
PN
88
89 /* The value can never be found when the set is empty */
6136a16b 90 if (intrev32ifbe(is->length) == 0) {
144b0094
PN
91 if (pos) *pos = 0;
92 return 0;
3ab98cef
PN
93 } else {
94 /* Check for the case where we know we cannot find the value,
95 * but do know the insert position. */
6136a16b 96 if (value > _intsetGet(is,intrev32ifbe(is->length)-1)) {
97 if (pos) *pos = intrev32ifbe(is->length);
3ab98cef 98 return 0;
76864d56 99 } else if (value < _intsetGet(is,0)) {
3ab98cef
PN
100 if (pos) *pos = 0;
101 return 0;
102 }
144b0094
PN
103 }
104
105 while(max >= min) {
106 mid = (min+max)/2;
76864d56 107 cur = _intsetGet(is,mid);
144b0094
PN
108 if (value > cur) {
109 min = mid+1;
110 } else if (value < cur) {
111 max = mid-1;
112 } else {
113 break;
114 }
115 }
116
117 if (value == cur) {
118 if (pos) *pos = mid;
119 return 1;
120 } else {
121 if (pos) *pos = min;
122 return 0;
123 }
124}
125
f9d5c4e3
PN
126/* Upgrades the intset to a larger encoding and inserts the given integer. */
127static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
6136a16b 128 uint8_t curenc = intrev32ifbe(is->encoding);
f9d5c4e3 129 uint8_t newenc = _intsetValueEncoding(value);
6136a16b 130 int length = intrev32ifbe(is->length);
f9d5c4e3
PN
131 int prepend = value < 0 ? 1 : 0;
132
133 /* First set new encoding and resize */
6136a16b 134 is->encoding = intrev32ifbe(newenc);
135 is = intsetResize(is,intrev32ifbe(is->length)+1);
f9d5c4e3
PN
136
137 /* Upgrade back-to-front so we don't overwrite values.
138 * Note that the "prepend" variable is used to make sure we have an empty
139 * space at either the beginning or the end of the intset. */
140 while(length--)
141 _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
142
143 /* Set the value at the beginning or the end. */
144 if (prepend)
145 _intsetSet(is,0,value);
146 else
6136a16b 147 _intsetSet(is,intrev32ifbe(is->length),value);
148 is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
f9d5c4e3
PN
149 return is;
150}
151
144b0094
PN
152static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
153 void *src, *dst;
6136a16b 154 uint32_t bytes = intrev32ifbe(is->length)-from;
155 uint32_t encoding = intrev32ifbe(is->encoding);
156
157 if (encoding == INTSET_ENC_INT64) {
144b0094
PN
158 src = (int64_t*)is->contents+from;
159 dst = (int64_t*)is->contents+to;
160 bytes *= sizeof(int64_t);
6136a16b 161 } else if (encoding == INTSET_ENC_INT32) {
144b0094
PN
162 src = (int32_t*)is->contents+from;
163 dst = (int32_t*)is->contents+to;
164 bytes *= sizeof(int32_t);
165 } else {
166 src = (int16_t*)is->contents+from;
167 dst = (int16_t*)is->contents+to;
168 bytes *= sizeof(int16_t);
169 }
170 memmove(dst,src,bytes);
171}
172
173/* Insert an integer in the intset */
174intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
76864d56 175 uint8_t valenc = _intsetValueEncoding(value);
740eee1c 176 uint32_t pos;
144b0094
PN
177 if (success) *success = 1;
178
179 /* Upgrade encoding if necessary. If we need to upgrade, we know that
180 * this value should be either appended (if > 0) or prepended (if < 0),
181 * because it lies outside the range of existing values. */
6136a16b 182 if (valenc > intrev32ifbe(is->encoding)) {
f9d5c4e3
PN
183 /* This always succeeds, so we don't need to curry *success. */
184 return intsetUpgradeAndAdd(is,value);
144b0094 185 } else {
3ab98cef
PN
186 /* Abort if the value is already present in the set.
187 * This call will populate "pos" with the right position to insert
188 * the value when it cannot be found. */
189 if (intsetSearch(is,value,&pos)) {
190 if (success) *success = 0;
191 return is;
144b0094
PN
192 }
193
6136a16b 194 is = intsetResize(is,intrev32ifbe(is->length)+1);
195 if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);
144b0094
PN
196 }
197
76864d56 198 _intsetSet(is,pos,value);
6136a16b 199 is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
144b0094
PN
200 return is;
201}
202
203/* Delete integer from intset */
a5be65f7 204intset *intsetRemove(intset *is, int64_t value, int *success) {
76864d56 205 uint8_t valenc = _intsetValueEncoding(value);
144b0094
PN
206 uint32_t pos;
207 if (success) *success = 0;
208
6136a16b 209 if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
210 uint32_t len = intrev32ifbe(is->length);
211
144b0094
PN
212 /* We know we can delete */
213 if (success) *success = 1;
214
215 /* Overwrite value with tail and update length */
6136a16b 216 if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);
217 is = intsetResize(is,len-1);
218 is->length = intrev32ifbe(len-1);
144b0094
PN
219 }
220 return is;
221}
222
223/* Determine whether a value belongs to this set */
224uint8_t intsetFind(intset *is, int64_t value) {
76864d56 225 uint8_t valenc = _intsetValueEncoding(value);
6136a16b 226 return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL);
144b0094
PN
227}
228
229/* Return random member */
230int64_t intsetRandom(intset *is) {
6136a16b 231 return _intsetGet(is,rand()%intrev32ifbe(is->length));
144b0094
PN
232}
233
d0b58d53
PN
234/* Sets the value to the value at the given position. When this position is
235 * out of range the function returns 0, when in range it returns 1. */
236uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
6136a16b 237 if (pos < intrev32ifbe(is->length)) {
76864d56 238 *value = _intsetGet(is,pos);
d0b58d53
PN
239 return 1;
240 }
241 return 0;
242}
243
244/* Return intset length */
245uint32_t intsetLen(intset *is) {
6136a16b 246 return intrev32ifbe(is->length);
d0b58d53
PN
247}
248
d4fb9f41 249/* Return intset blob size in bytes. */
250size_t intsetBlobLen(intset *is) {
6136a16b 251 return sizeof(intset)+intrev32ifbe(is->length)*intrev32ifbe(is->encoding);
d4fb9f41 252}
253
144b0094
PN
254#ifdef INTSET_TEST_MAIN
255#include <sys/time.h>
256
257void intsetRepr(intset *is) {
258 int i;
6136a16b 259 for (i = 0; i < intrev32ifbe(is->length); i++) {
76864d56 260 printf("%lld\n", (uint64_t)_intsetGet(is,i));
144b0094
PN
261 }
262 printf("\n");
263}
264
265void error(char *err) {
266 printf("%s\n", err);
267 exit(1);
268}
269
270void ok(void) {
271 printf("OK\n");
272}
273
274long long usec(void) {
275 struct timeval tv;
276 gettimeofday(&tv,NULL);
277 return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
278}
279
280#define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
281void _assert(char *estr, char *file, int line) {
282 printf("\n\n=== ASSERTION FAILED ===\n");
283 printf("==> %s:%d '%s' is not true\n",file,line,estr);
284}
285
286intset *createSet(int bits, int size) {
287 uint64_t mask = (1<<bits)-1;
288 uint64_t i, value;
289 intset *is = intsetNew();
290
291 for (i = 0; i < size; i++) {
292 if (bits > 32) {
293 value = (rand()*rand()) & mask;
294 } else {
295 value = rand() & mask;
296 }
297 is = intsetAdd(is,value,NULL);
298 }
299 return is;
300}
301
302void checkConsistency(intset *is) {
303 int i;
304
6136a16b 305 for (i = 0; i < (intrev32ifbe(is->length)-1); i++) {
306 uint32_t encoding = intrev32ifbe(is->encoding);
307
308 if (encoding == INTSET_ENC_INT16) {
144b0094
PN
309 int16_t *i16 = (int16_t*)is->contents;
310 assert(i16[i] < i16[i+1]);
6136a16b 311 } else if (encoding == INTSET_ENC_INT32) {
144b0094
PN
312 int32_t *i32 = (int32_t*)is->contents;
313 assert(i32[i] < i32[i+1]);
314 } else {
315 int64_t *i64 = (int64_t*)is->contents;
316 assert(i64[i] < i64[i+1]);
317 }
318 }
319}
320
321int main(int argc, char **argv) {
322 uint8_t success;
323 int i;
324 intset *is;
325 sranddev();
326
327 printf("Value encodings: "); {
76864d56
PN
328 assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
329 assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);
330 assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);
331 assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);
332 assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);
333 assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
334 assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
335 assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
336 assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
337 assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
144b0094
PN
338 ok();
339 }
340
341 printf("Basic adding: "); {
342 is = intsetNew();
343 is = intsetAdd(is,5,&success); assert(success);
344 is = intsetAdd(is,6,&success); assert(success);
345 is = intsetAdd(is,4,&success); assert(success);
346 is = intsetAdd(is,4,&success); assert(!success);
347 ok();
348 }
349
350 printf("Large number of random adds: "); {
351 int inserts = 0;
352 is = intsetNew();
353 for (i = 0; i < 1024; i++) {
354 is = intsetAdd(is,rand()%0x800,&success);
355 if (success) inserts++;
356 }
6136a16b 357 assert(intrev32ifbe(is->length) == inserts);
144b0094
PN
358 checkConsistency(is);
359 ok();
360 }
361
362 printf("Upgrade from int16 to int32: "); {
363 is = intsetNew();
364 is = intsetAdd(is,32,NULL);
6136a16b 365 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
144b0094 366 is = intsetAdd(is,65535,NULL);
6136a16b 367 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
144b0094
PN
368 assert(intsetFind(is,32));
369 assert(intsetFind(is,65535));
370 checkConsistency(is);
371
372 is = intsetNew();
373 is = intsetAdd(is,32,NULL);
6136a16b 374 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
144b0094 375 is = intsetAdd(is,-65535,NULL);
6136a16b 376 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
144b0094
PN
377 assert(intsetFind(is,32));
378 assert(intsetFind(is,-65535));
379 checkConsistency(is);
380 ok();
381 }
382
383 printf("Upgrade from int16 to int64: "); {
384 is = intsetNew();
385 is = intsetAdd(is,32,NULL);
6136a16b 386 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
144b0094 387 is = intsetAdd(is,4294967295,NULL);
6136a16b 388 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
144b0094
PN
389 assert(intsetFind(is,32));
390 assert(intsetFind(is,4294967295));
391 checkConsistency(is);
392
393 is = intsetNew();
394 is = intsetAdd(is,32,NULL);
6136a16b 395 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
144b0094 396 is = intsetAdd(is,-4294967295,NULL);
6136a16b 397 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
144b0094
PN
398 assert(intsetFind(is,32));
399 assert(intsetFind(is,-4294967295));
400 checkConsistency(is);
401 ok();
402 }
403
404 printf("Upgrade from int32 to int64: "); {
405 is = intsetNew();
406 is = intsetAdd(is,65535,NULL);
6136a16b 407 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
144b0094 408 is = intsetAdd(is,4294967295,NULL);
6136a16b 409 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
144b0094
PN
410 assert(intsetFind(is,65535));
411 assert(intsetFind(is,4294967295));
412 checkConsistency(is);
413
414 is = intsetNew();
415 is = intsetAdd(is,65535,NULL);
6136a16b 416 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
144b0094 417 is = intsetAdd(is,-4294967295,NULL);
6136a16b 418 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
144b0094
PN
419 assert(intsetFind(is,65535));
420 assert(intsetFind(is,-4294967295));
421 checkConsistency(is);
422 ok();
423 }
424
425 printf("Stress lookups: "); {
426 long num = 100000, size = 10000;
427 int i, bits = 20;
428 long long start;
429 is = createSet(bits,size);
430 checkConsistency(is);
431
432 start = usec();
433 for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
434 printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
435 }
436
437 printf("Stress add+delete: "); {
438 int i, v1, v2;
439 is = intsetNew();
440 for (i = 0; i < 0xffff; i++) {
441 v1 = rand() % 0xfff;
442 is = intsetAdd(is,v1,NULL);
443 assert(intsetFind(is,v1));
444
445 v2 = rand() % 0xfff;
e24d9376 446 is = intsetRemove(is,v2,NULL);
144b0094
PN
447 assert(!intsetFind(is,v2));
448 }
449 checkConsistency(is);
450 ok();
451 }
452}
453#endif