]> git.saurik.com Git - redis.git/blob - src/intset.c
Optimize LRANGE to scan the list starting from the head or the tail in order to trave...
[redis.git] / src / intset.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include "intset.h"
5 #include "zmalloc.h"
6 #include "endian.h"
7
8 /* Note that these encodings are ordered, so:
9 * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
10 #define INTSET_ENC_INT16 (sizeof(int16_t))
11 #define INTSET_ENC_INT32 (sizeof(int32_t))
12 #define INTSET_ENC_INT64 (sizeof(int64_t))
13
14 /* Return the required encoding for the provided value. */
15 static uint8_t _intsetValueEncoding(int64_t v) {
16 if (v < INT32_MIN || v > INT32_MAX)
17 return INTSET_ENC_INT64;
18 else if (v < INT16_MIN || v > INT16_MAX)
19 return INTSET_ENC_INT32;
20 else
21 return INTSET_ENC_INT16;
22 }
23
24 /* Return the value at pos, given an encoding. */
25 static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
26 int64_t v64;
27 int32_t v32;
28 int16_t v16;
29
30 if (enc == INTSET_ENC_INT64) {
31 memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));
32 memrev64ifbe(&v64);
33 return v64;
34 } else if (enc == INTSET_ENC_INT32) {
35 memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));
36 memrev32ifbe(&v32);
37 return v32;
38 } else {
39 memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));
40 memrev16ifbe(&v16);
41 return v16;
42 }
43 }
44
45 /* Return the value at pos, using the configured encoding. */
46 static int64_t _intsetGet(intset *is, int pos) {
47 return _intsetGetEncoded(is,pos,is->encoding);
48 }
49
50 /* Set the value at pos, using the configured encoding. */
51 static void _intsetSet(intset *is, int pos, int64_t value) {
52 if (is->encoding == INTSET_ENC_INT64) {
53 ((int64_t*)is->contents)[pos] = value;
54 memrev64ifbe(((int64_t*)is->contents)+pos);
55 } else if (is->encoding == INTSET_ENC_INT32) {
56 ((int32_t*)is->contents)[pos] = value;
57 memrev32ifbe(((int32_t*)is->contents)+pos);
58 } else {
59 ((int16_t*)is->contents)[pos] = value;
60 memrev16ifbe(((int16_t*)is->contents)+pos);
61 }
62 }
63
64 /* Create an empty intset. */
65 intset *intsetNew(void) {
66 intset *is = zmalloc(sizeof(intset));
67 is->encoding = INTSET_ENC_INT16;
68 is->length = 0;
69 return is;
70 }
71
72 /* Resize the intset */
73 static intset *intsetResize(intset *is, uint32_t len) {
74 uint32_t size = len*is->encoding;
75 is = zrealloc(is,sizeof(intset)+size);
76 return is;
77 }
78
79 /* Search for the position of "value". Return 1 when the value was found and
80 * sets "pos" to the position of the value within the intset. Return 0 when
81 * the value is not present in the intset and sets "pos" to the position
82 * where "value" can be inserted. */
83 static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
84 int min = 0, max = is->length-1, mid = -1;
85 int64_t cur = -1;
86
87 /* The value can never be found when the set is empty */
88 if (is->length == 0) {
89 if (pos) *pos = 0;
90 return 0;
91 } else {
92 /* Check for the case where we know we cannot find the value,
93 * but do know the insert position. */
94 if (value > _intsetGet(is,is->length-1)) {
95 if (pos) *pos = is->length;
96 return 0;
97 } else if (value < _intsetGet(is,0)) {
98 if (pos) *pos = 0;
99 return 0;
100 }
101 }
102
103 while(max >= min) {
104 mid = (min+max)/2;
105 cur = _intsetGet(is,mid);
106 if (value > cur) {
107 min = mid+1;
108 } else if (value < cur) {
109 max = mid-1;
110 } else {
111 break;
112 }
113 }
114
115 if (value == cur) {
116 if (pos) *pos = mid;
117 return 1;
118 } else {
119 if (pos) *pos = min;
120 return 0;
121 }
122 }
123
124 /* Upgrades the intset to a larger encoding and inserts the given integer. */
125 static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
126 uint8_t curenc = is->encoding;
127 uint8_t newenc = _intsetValueEncoding(value);
128 int length = is->length;
129 int prepend = value < 0 ? 1 : 0;
130
131 /* First set new encoding and resize */
132 is->encoding = newenc;
133 is = intsetResize(is,is->length+1);
134
135 /* Upgrade back-to-front so we don't overwrite values.
136 * Note that the "prepend" variable is used to make sure we have an empty
137 * space at either the beginning or the end of the intset. */
138 while(length--)
139 _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
140
141 /* Set the value at the beginning or the end. */
142 if (prepend)
143 _intsetSet(is,0,value);
144 else
145 _intsetSet(is,is->length,value);
146 is->length++;
147 return is;
148 }
149
150 static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
151 void *src, *dst;
152 uint32_t bytes = is->length-from;
153 if (is->encoding == INTSET_ENC_INT64) {
154 src = (int64_t*)is->contents+from;
155 dst = (int64_t*)is->contents+to;
156 bytes *= sizeof(int64_t);
157 } else if (is->encoding == INTSET_ENC_INT32) {
158 src = (int32_t*)is->contents+from;
159 dst = (int32_t*)is->contents+to;
160 bytes *= sizeof(int32_t);
161 } else {
162 src = (int16_t*)is->contents+from;
163 dst = (int16_t*)is->contents+to;
164 bytes *= sizeof(int16_t);
165 }
166 memmove(dst,src,bytes);
167 }
168
169 /* Insert an integer in the intset */
170 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
171 uint8_t valenc = _intsetValueEncoding(value);
172 uint32_t pos;
173 if (success) *success = 1;
174
175 /* Upgrade encoding if necessary. If we need to upgrade, we know that
176 * this value should be either appended (if > 0) or prepended (if < 0),
177 * because it lies outside the range of existing values. */
178 if (valenc > is->encoding) {
179 /* This always succeeds, so we don't need to curry *success. */
180 return intsetUpgradeAndAdd(is,value);
181 } else {
182 /* Abort if the value is already present in the set.
183 * This call will populate "pos" with the right position to insert
184 * the value when it cannot be found. */
185 if (intsetSearch(is,value,&pos)) {
186 if (success) *success = 0;
187 return is;
188 }
189
190 is = intsetResize(is,is->length+1);
191 if (pos < is->length) intsetMoveTail(is,pos,pos+1);
192 }
193
194 _intsetSet(is,pos,value);
195 is->length++;
196 return is;
197 }
198
199 /* Delete integer from intset */
200 intset *intsetRemove(intset *is, int64_t value, int *success) {
201 uint8_t valenc = _intsetValueEncoding(value);
202 uint32_t pos;
203 if (success) *success = 0;
204
205 if (valenc <= is->encoding && intsetSearch(is,value,&pos)) {
206 /* We know we can delete */
207 if (success) *success = 1;
208
209 /* Overwrite value with tail and update length */
210 if (pos < (is->length-1)) intsetMoveTail(is,pos+1,pos);
211 is = intsetResize(is,is->length-1);
212 is->length--;
213 }
214 return is;
215 }
216
217 /* Determine whether a value belongs to this set */
218 uint8_t intsetFind(intset *is, int64_t value) {
219 uint8_t valenc = _intsetValueEncoding(value);
220 return valenc <= is->encoding && intsetSearch(is,value,NULL);
221 }
222
223 /* Return random member */
224 int64_t intsetRandom(intset *is) {
225 return _intsetGet(is,rand()%is->length);
226 }
227
228 /* Sets the value to the value at the given position. When this position is
229 * out of range the function returns 0, when in range it returns 1. */
230 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
231 if (pos < is->length) {
232 *value = _intsetGet(is,pos);
233 return 1;
234 }
235 return 0;
236 }
237
238 /* Return intset length */
239 uint32_t intsetLen(intset *is) {
240 return is->length;
241 }
242
243 /* Return intset blob size in bytes. */
244 size_t intsetBlobLen(intset *is) {
245 return sizeof(intset)+is->length*is->encoding;
246 }
247
248 #ifdef INTSET_TEST_MAIN
249 #include <sys/time.h>
250
251 void intsetRepr(intset *is) {
252 int i;
253 for (i = 0; i < is->length; i++) {
254 printf("%lld\n", (uint64_t)_intsetGet(is,i));
255 }
256 printf("\n");
257 }
258
259 void error(char *err) {
260 printf("%s\n", err);
261 exit(1);
262 }
263
264 void ok(void) {
265 printf("OK\n");
266 }
267
268 long long usec(void) {
269 struct timeval tv;
270 gettimeofday(&tv,NULL);
271 return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
272 }
273
274 #define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
275 void _assert(char *estr, char *file, int line) {
276 printf("\n\n=== ASSERTION FAILED ===\n");
277 printf("==> %s:%d '%s' is not true\n",file,line,estr);
278 }
279
280 intset *createSet(int bits, int size) {
281 uint64_t mask = (1<<bits)-1;
282 uint64_t i, value;
283 intset *is = intsetNew();
284
285 for (i = 0; i < size; i++) {
286 if (bits > 32) {
287 value = (rand()*rand()) & mask;
288 } else {
289 value = rand() & mask;
290 }
291 is = intsetAdd(is,value,NULL);
292 }
293 return is;
294 }
295
296 void checkConsistency(intset *is) {
297 int i;
298
299 for (i = 0; i < (is->length-1); i++) {
300 if (is->encoding == INTSET_ENC_INT16) {
301 int16_t *i16 = (int16_t*)is->contents;
302 assert(i16[i] < i16[i+1]);
303 } else if (is->encoding == INTSET_ENC_INT32) {
304 int32_t *i32 = (int32_t*)is->contents;
305 assert(i32[i] < i32[i+1]);
306 } else {
307 int64_t *i64 = (int64_t*)is->contents;
308 assert(i64[i] < i64[i+1]);
309 }
310 }
311 }
312
313 int main(int argc, char **argv) {
314 uint8_t success;
315 int i;
316 intset *is;
317 sranddev();
318
319 printf("Value encodings: "); {
320 assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
321 assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);
322 assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);
323 assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);
324 assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);
325 assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
326 assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
327 assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
328 assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
329 assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
330 ok();
331 }
332
333 printf("Basic adding: "); {
334 is = intsetNew();
335 is = intsetAdd(is,5,&success); assert(success);
336 is = intsetAdd(is,6,&success); assert(success);
337 is = intsetAdd(is,4,&success); assert(success);
338 is = intsetAdd(is,4,&success); assert(!success);
339 ok();
340 }
341
342 printf("Large number of random adds: "); {
343 int inserts = 0;
344 is = intsetNew();
345 for (i = 0; i < 1024; i++) {
346 is = intsetAdd(is,rand()%0x800,&success);
347 if (success) inserts++;
348 }
349 assert(is->length == inserts);
350 checkConsistency(is);
351 ok();
352 }
353
354 printf("Upgrade from int16 to int32: "); {
355 is = intsetNew();
356 is = intsetAdd(is,32,NULL);
357 assert(is->encoding == INTSET_ENC_INT16);
358 is = intsetAdd(is,65535,NULL);
359 assert(is->encoding == INTSET_ENC_INT32);
360 assert(intsetFind(is,32));
361 assert(intsetFind(is,65535));
362 checkConsistency(is);
363
364 is = intsetNew();
365 is = intsetAdd(is,32,NULL);
366 assert(is->encoding == INTSET_ENC_INT16);
367 is = intsetAdd(is,-65535,NULL);
368 assert(is->encoding == INTSET_ENC_INT32);
369 assert(intsetFind(is,32));
370 assert(intsetFind(is,-65535));
371 checkConsistency(is);
372 ok();
373 }
374
375 printf("Upgrade from int16 to int64: "); {
376 is = intsetNew();
377 is = intsetAdd(is,32,NULL);
378 assert(is->encoding == INTSET_ENC_INT16);
379 is = intsetAdd(is,4294967295,NULL);
380 assert(is->encoding == INTSET_ENC_INT64);
381 assert(intsetFind(is,32));
382 assert(intsetFind(is,4294967295));
383 checkConsistency(is);
384
385 is = intsetNew();
386 is = intsetAdd(is,32,NULL);
387 assert(is->encoding == INTSET_ENC_INT16);
388 is = intsetAdd(is,-4294967295,NULL);
389 assert(is->encoding == INTSET_ENC_INT64);
390 assert(intsetFind(is,32));
391 assert(intsetFind(is,-4294967295));
392 checkConsistency(is);
393 ok();
394 }
395
396 printf("Upgrade from int32 to int64: "); {
397 is = intsetNew();
398 is = intsetAdd(is,65535,NULL);
399 assert(is->encoding == INTSET_ENC_INT32);
400 is = intsetAdd(is,4294967295,NULL);
401 assert(is->encoding == INTSET_ENC_INT64);
402 assert(intsetFind(is,65535));
403 assert(intsetFind(is,4294967295));
404 checkConsistency(is);
405
406 is = intsetNew();
407 is = intsetAdd(is,65535,NULL);
408 assert(is->encoding == INTSET_ENC_INT32);
409 is = intsetAdd(is,-4294967295,NULL);
410 assert(is->encoding == INTSET_ENC_INT64);
411 assert(intsetFind(is,65535));
412 assert(intsetFind(is,-4294967295));
413 checkConsistency(is);
414 ok();
415 }
416
417 printf("Stress lookups: "); {
418 long num = 100000, size = 10000;
419 int i, bits = 20;
420 long long start;
421 is = createSet(bits,size);
422 checkConsistency(is);
423
424 start = usec();
425 for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
426 printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
427 }
428
429 printf("Stress add+delete: "); {
430 int i, v1, v2;
431 is = intsetNew();
432 for (i = 0; i < 0xffff; i++) {
433 v1 = rand() % 0xfff;
434 is = intsetAdd(is,v1,NULL);
435 assert(intsetFind(is,v1));
436
437 v2 = rand() % 0xfff;
438 is = intsetRemove(is,v2,NULL);
439 assert(!intsetFind(is,v2));
440 }
441 checkConsistency(is);
442 ok();
443 }
444 }
445 #endif