]> git.saurik.com Git - redis.git/blob - src/intset.c
Hash function switched to murmurhash2.
[redis.git] / src / intset.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include "intset.h"
5 #include "zmalloc.h"
6 #include "endianconv.h"
7
8 /* Note that these encodings are ordered, so:
9 * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
10 #define INTSET_ENC_INT16 (sizeof(int16_t))
11 #define INTSET_ENC_INT32 (sizeof(int32_t))
12 #define INTSET_ENC_INT64 (sizeof(int64_t))
13
14 /* Return the required encoding for the provided value. */
15 static uint8_t _intsetValueEncoding(int64_t v) {
16 if (v < INT32_MIN || v > INT32_MAX)
17 return INTSET_ENC_INT64;
18 else if (v < INT16_MIN || v > INT16_MAX)
19 return INTSET_ENC_INT32;
20 else
21 return INTSET_ENC_INT16;
22 }
23
24 /* Return the value at pos, given an encoding. */
25 static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
26 int64_t v64;
27 int32_t v32;
28 int16_t v16;
29
30 if (enc == INTSET_ENC_INT64) {
31 memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));
32 memrev64ifbe(&v64);
33 return v64;
34 } else if (enc == INTSET_ENC_INT32) {
35 memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));
36 memrev32ifbe(&v32);
37 return v32;
38 } else {
39 memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));
40 memrev16ifbe(&v16);
41 return v16;
42 }
43 }
44
45 /* Return the value at pos, using the configured encoding. */
46 static int64_t _intsetGet(intset *is, int pos) {
47 return _intsetGetEncoded(is,pos,intrev32ifbe(is->encoding));
48 }
49
50 /* Set the value at pos, using the configured encoding. */
51 static void _intsetSet(intset *is, int pos, int64_t value) {
52 uint32_t encoding = intrev32ifbe(is->encoding);
53
54 if (encoding == INTSET_ENC_INT64) {
55 ((int64_t*)is->contents)[pos] = value;
56 memrev64ifbe(((int64_t*)is->contents)+pos);
57 } else if (encoding == INTSET_ENC_INT32) {
58 ((int32_t*)is->contents)[pos] = value;
59 memrev32ifbe(((int32_t*)is->contents)+pos);
60 } else {
61 ((int16_t*)is->contents)[pos] = value;
62 memrev16ifbe(((int16_t*)is->contents)+pos);
63 }
64 }
65
66 /* Create an empty intset. */
67 intset *intsetNew(void) {
68 intset *is = zmalloc(sizeof(intset));
69 is->encoding = intrev32ifbe(INTSET_ENC_INT16);
70 is->length = 0;
71 return is;
72 }
73
74 /* Resize the intset */
75 static intset *intsetResize(intset *is, uint32_t len) {
76 uint32_t size = len*intrev32ifbe(is->encoding);
77 is = zrealloc(is,sizeof(intset)+size);
78 return is;
79 }
80
81 /* Search for the position of "value". Return 1 when the value was found and
82 * sets "pos" to the position of the value within the intset. Return 0 when
83 * the value is not present in the intset and sets "pos" to the position
84 * where "value" can be inserted. */
85 static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
86 int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
87 int64_t cur = -1;
88
89 /* The value can never be found when the set is empty */
90 if (intrev32ifbe(is->length) == 0) {
91 if (pos) *pos = 0;
92 return 0;
93 } else {
94 /* Check for the case where we know we cannot find the value,
95 * but do know the insert position. */
96 if (value > _intsetGet(is,intrev32ifbe(is->length)-1)) {
97 if (pos) *pos = intrev32ifbe(is->length);
98 return 0;
99 } else if (value < _intsetGet(is,0)) {
100 if (pos) *pos = 0;
101 return 0;
102 }
103 }
104
105 while(max >= min) {
106 mid = (min+max)/2;
107 cur = _intsetGet(is,mid);
108 if (value > cur) {
109 min = mid+1;
110 } else if (value < cur) {
111 max = mid-1;
112 } else {
113 break;
114 }
115 }
116
117 if (value == cur) {
118 if (pos) *pos = mid;
119 return 1;
120 } else {
121 if (pos) *pos = min;
122 return 0;
123 }
124 }
125
126 /* Upgrades the intset to a larger encoding and inserts the given integer. */
127 static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
128 uint8_t curenc = intrev32ifbe(is->encoding);
129 uint8_t newenc = _intsetValueEncoding(value);
130 int length = intrev32ifbe(is->length);
131 int prepend = value < 0 ? 1 : 0;
132
133 /* First set new encoding and resize */
134 is->encoding = intrev32ifbe(newenc);
135 is = intsetResize(is,intrev32ifbe(is->length)+1);
136
137 /* Upgrade back-to-front so we don't overwrite values.
138 * Note that the "prepend" variable is used to make sure we have an empty
139 * space at either the beginning or the end of the intset. */
140 while(length--)
141 _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
142
143 /* Set the value at the beginning or the end. */
144 if (prepend)
145 _intsetSet(is,0,value);
146 else
147 _intsetSet(is,intrev32ifbe(is->length),value);
148 is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
149 return is;
150 }
151
152 static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
153 void *src, *dst;
154 uint32_t bytes = intrev32ifbe(is->length)-from;
155 uint32_t encoding = intrev32ifbe(is->encoding);
156
157 if (encoding == INTSET_ENC_INT64) {
158 src = (int64_t*)is->contents+from;
159 dst = (int64_t*)is->contents+to;
160 bytes *= sizeof(int64_t);
161 } else if (encoding == INTSET_ENC_INT32) {
162 src = (int32_t*)is->contents+from;
163 dst = (int32_t*)is->contents+to;
164 bytes *= sizeof(int32_t);
165 } else {
166 src = (int16_t*)is->contents+from;
167 dst = (int16_t*)is->contents+to;
168 bytes *= sizeof(int16_t);
169 }
170 memmove(dst,src,bytes);
171 }
172
173 /* Insert an integer in the intset */
174 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
175 uint8_t valenc = _intsetValueEncoding(value);
176 uint32_t pos;
177 if (success) *success = 1;
178
179 /* Upgrade encoding if necessary. If we need to upgrade, we know that
180 * this value should be either appended (if > 0) or prepended (if < 0),
181 * because it lies outside the range of existing values. */
182 if (valenc > intrev32ifbe(is->encoding)) {
183 /* This always succeeds, so we don't need to curry *success. */
184 return intsetUpgradeAndAdd(is,value);
185 } else {
186 /* Abort if the value is already present in the set.
187 * This call will populate "pos" with the right position to insert
188 * the value when it cannot be found. */
189 if (intsetSearch(is,value,&pos)) {
190 if (success) *success = 0;
191 return is;
192 }
193
194 is = intsetResize(is,intrev32ifbe(is->length)+1);
195 if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);
196 }
197
198 _intsetSet(is,pos,value);
199 is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
200 return is;
201 }
202
203 /* Delete integer from intset */
204 intset *intsetRemove(intset *is, int64_t value, int *success) {
205 uint8_t valenc = _intsetValueEncoding(value);
206 uint32_t pos;
207 if (success) *success = 0;
208
209 if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
210 uint32_t len = intrev32ifbe(is->length);
211
212 /* We know we can delete */
213 if (success) *success = 1;
214
215 /* Overwrite value with tail and update length */
216 if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);
217 is = intsetResize(is,len-1);
218 is->length = intrev32ifbe(len-1);
219 }
220 return is;
221 }
222
223 /* Determine whether a value belongs to this set */
224 uint8_t intsetFind(intset *is, int64_t value) {
225 uint8_t valenc = _intsetValueEncoding(value);
226 return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL);
227 }
228
229 /* Return random member */
230 int64_t intsetRandom(intset *is) {
231 return _intsetGet(is,rand()%intrev32ifbe(is->length));
232 }
233
234 /* Sets the value to the value at the given position. When this position is
235 * out of range the function returns 0, when in range it returns 1. */
236 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
237 if (pos < intrev32ifbe(is->length)) {
238 *value = _intsetGet(is,pos);
239 return 1;
240 }
241 return 0;
242 }
243
244 /* Return intset length */
245 uint32_t intsetLen(intset *is) {
246 return intrev32ifbe(is->length);
247 }
248
249 /* Return intset blob size in bytes. */
250 size_t intsetBlobLen(intset *is) {
251 return sizeof(intset)+intrev32ifbe(is->length)*intrev32ifbe(is->encoding);
252 }
253
254 #ifdef INTSET_TEST_MAIN
255 #include <sys/time.h>
256
257 void intsetRepr(intset *is) {
258 int i;
259 for (i = 0; i < intrev32ifbe(is->length); i++) {
260 printf("%lld\n", (uint64_t)_intsetGet(is,i));
261 }
262 printf("\n");
263 }
264
265 void error(char *err) {
266 printf("%s\n", err);
267 exit(1);
268 }
269
270 void ok(void) {
271 printf("OK\n");
272 }
273
274 long long usec(void) {
275 struct timeval tv;
276 gettimeofday(&tv,NULL);
277 return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
278 }
279
280 #define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
281 void _assert(char *estr, char *file, int line) {
282 printf("\n\n=== ASSERTION FAILED ===\n");
283 printf("==> %s:%d '%s' is not true\n",file,line,estr);
284 }
285
286 intset *createSet(int bits, int size) {
287 uint64_t mask = (1<<bits)-1;
288 uint64_t i, value;
289 intset *is = intsetNew();
290
291 for (i = 0; i < size; i++) {
292 if (bits > 32) {
293 value = (rand()*rand()) & mask;
294 } else {
295 value = rand() & mask;
296 }
297 is = intsetAdd(is,value,NULL);
298 }
299 return is;
300 }
301
302 void checkConsistency(intset *is) {
303 int i;
304
305 for (i = 0; i < (intrev32ifbe(is->length)-1); i++) {
306 uint32_t encoding = intrev32ifbe(is->encoding);
307
308 if (encoding == INTSET_ENC_INT16) {
309 int16_t *i16 = (int16_t*)is->contents;
310 assert(i16[i] < i16[i+1]);
311 } else if (encoding == INTSET_ENC_INT32) {
312 int32_t *i32 = (int32_t*)is->contents;
313 assert(i32[i] < i32[i+1]);
314 } else {
315 int64_t *i64 = (int64_t*)is->contents;
316 assert(i64[i] < i64[i+1]);
317 }
318 }
319 }
320
321 int main(int argc, char **argv) {
322 uint8_t success;
323 int i;
324 intset *is;
325 sranddev();
326
327 printf("Value encodings: "); {
328 assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
329 assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);
330 assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);
331 assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);
332 assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);
333 assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
334 assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
335 assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
336 assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
337 assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
338 ok();
339 }
340
341 printf("Basic adding: "); {
342 is = intsetNew();
343 is = intsetAdd(is,5,&success); assert(success);
344 is = intsetAdd(is,6,&success); assert(success);
345 is = intsetAdd(is,4,&success); assert(success);
346 is = intsetAdd(is,4,&success); assert(!success);
347 ok();
348 }
349
350 printf("Large number of random adds: "); {
351 int inserts = 0;
352 is = intsetNew();
353 for (i = 0; i < 1024; i++) {
354 is = intsetAdd(is,rand()%0x800,&success);
355 if (success) inserts++;
356 }
357 assert(intrev32ifbe(is->length) == inserts);
358 checkConsistency(is);
359 ok();
360 }
361
362 printf("Upgrade from int16 to int32: "); {
363 is = intsetNew();
364 is = intsetAdd(is,32,NULL);
365 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
366 is = intsetAdd(is,65535,NULL);
367 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
368 assert(intsetFind(is,32));
369 assert(intsetFind(is,65535));
370 checkConsistency(is);
371
372 is = intsetNew();
373 is = intsetAdd(is,32,NULL);
374 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
375 is = intsetAdd(is,-65535,NULL);
376 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
377 assert(intsetFind(is,32));
378 assert(intsetFind(is,-65535));
379 checkConsistency(is);
380 ok();
381 }
382
383 printf("Upgrade from int16 to int64: "); {
384 is = intsetNew();
385 is = intsetAdd(is,32,NULL);
386 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
387 is = intsetAdd(is,4294967295,NULL);
388 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
389 assert(intsetFind(is,32));
390 assert(intsetFind(is,4294967295));
391 checkConsistency(is);
392
393 is = intsetNew();
394 is = intsetAdd(is,32,NULL);
395 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
396 is = intsetAdd(is,-4294967295,NULL);
397 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
398 assert(intsetFind(is,32));
399 assert(intsetFind(is,-4294967295));
400 checkConsistency(is);
401 ok();
402 }
403
404 printf("Upgrade from int32 to int64: "); {
405 is = intsetNew();
406 is = intsetAdd(is,65535,NULL);
407 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
408 is = intsetAdd(is,4294967295,NULL);
409 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
410 assert(intsetFind(is,65535));
411 assert(intsetFind(is,4294967295));
412 checkConsistency(is);
413
414 is = intsetNew();
415 is = intsetAdd(is,65535,NULL);
416 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
417 is = intsetAdd(is,-4294967295,NULL);
418 assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
419 assert(intsetFind(is,65535));
420 assert(intsetFind(is,-4294967295));
421 checkConsistency(is);
422 ok();
423 }
424
425 printf("Stress lookups: "); {
426 long num = 100000, size = 10000;
427 int i, bits = 20;
428 long long start;
429 is = createSet(bits,size);
430 checkConsistency(is);
431
432 start = usec();
433 for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
434 printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
435 }
436
437 printf("Stress add+delete: "); {
438 int i, v1, v2;
439 is = intsetNew();
440 for (i = 0; i < 0xffff; i++) {
441 v1 = rand() % 0xfff;
442 is = intsetAdd(is,v1,NULL);
443 assert(intsetFind(is,v1));
444
445 v2 = rand() % 0xfff;
446 is = intsetRemove(is,v2,NULL);
447 assert(!intsetFind(is,v2));
448 }
449 checkConsistency(is);
450 ok();
451 }
452 }
453 #endif