]> git.saurik.com Git - redis.git/blob - src/intset.c
Support dual encoding for ZRANGEBYSCORE et al
[redis.git] / src / intset.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include "intset.h"
5 #include "zmalloc.h"
6
7 /* Note that these encodings are ordered, so:
8 * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
9 #define INTSET_ENC_INT16 (sizeof(int16_t))
10 #define INTSET_ENC_INT32 (sizeof(int32_t))
11 #define INTSET_ENC_INT64 (sizeof(int64_t))
12
13 /* Return the required encoding for the provided value. */
14 static uint8_t _intsetValueEncoding(int64_t v) {
15 if (v < INT32_MIN || v > INT32_MAX)
16 return INTSET_ENC_INT64;
17 else if (v < INT16_MIN || v > INT16_MAX)
18 return INTSET_ENC_INT32;
19 return INTSET_ENC_INT16;
20 }
21
22 /* Return the value at pos, given an encoding. */
23 static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
24 if (enc == INTSET_ENC_INT64)
25 return ((int64_t*)is->contents)[pos];
26 else if (enc == INTSET_ENC_INT32)
27 return ((int32_t*)is->contents)[pos];
28 return ((int16_t*)is->contents)[pos];
29 }
30
31 /* Return the value at pos, using the configured encoding. */
32 static int64_t _intsetGet(intset *is, int pos) {
33 return _intsetGetEncoded(is,pos,is->encoding);
34 }
35
36 /* Set the value at pos, using the configured encoding. */
37 static void _intsetSet(intset *is, int pos, int64_t value) {
38 if (is->encoding == INTSET_ENC_INT64)
39 ((int64_t*)is->contents)[pos] = value;
40 else if (is->encoding == INTSET_ENC_INT32)
41 ((int32_t*)is->contents)[pos] = value;
42 else
43 ((int16_t*)is->contents)[pos] = value;
44 }
45
46 /* Create an empty intset. */
47 intset *intsetNew(void) {
48 intset *is = zmalloc(sizeof(intset));
49 is->encoding = INTSET_ENC_INT16;
50 is->length = 0;
51 return is;
52 }
53
54 /* Resize the intset */
55 static intset *intsetResize(intset *is, uint32_t len) {
56 uint32_t size = len*is->encoding;
57 is = zrealloc(is,sizeof(intset)+size);
58 return is;
59 }
60
61 /* Search for the position of "value". Return 1 when the value was found and
62 * sets "pos" to the position of the value within the intset. Return 0 when
63 * the value is not present in the intset and sets "pos" to the position
64 * where "value" can be inserted. */
65 static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
66 int min = 0, max = is->length-1, mid = -1;
67 int64_t cur = -1;
68
69 /* The value can never be found when the set is empty */
70 if (is->length == 0) {
71 if (pos) *pos = 0;
72 return 0;
73 } else {
74 /* Check for the case where we know we cannot find the value,
75 * but do know the insert position. */
76 if (value > _intsetGet(is,is->length-1)) {
77 if (pos) *pos = is->length;
78 return 0;
79 } else if (value < _intsetGet(is,0)) {
80 if (pos) *pos = 0;
81 return 0;
82 }
83 }
84
85 while(max >= min) {
86 mid = (min+max)/2;
87 cur = _intsetGet(is,mid);
88 if (value > cur) {
89 min = mid+1;
90 } else if (value < cur) {
91 max = mid-1;
92 } else {
93 break;
94 }
95 }
96
97 if (value == cur) {
98 if (pos) *pos = mid;
99 return 1;
100 } else {
101 if (pos) *pos = min;
102 return 0;
103 }
104 }
105
106 /* Upgrades the intset to a larger encoding and inserts the given integer. */
107 static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
108 uint8_t curenc = is->encoding;
109 uint8_t newenc = _intsetValueEncoding(value);
110 int length = is->length;
111 int prepend = value < 0 ? 1 : 0;
112
113 /* First set new encoding and resize */
114 is->encoding = newenc;
115 is = intsetResize(is,is->length+1);
116
117 /* Upgrade back-to-front so we don't overwrite values.
118 * Note that the "prepend" variable is used to make sure we have an empty
119 * space at either the beginning or the end of the intset. */
120 while(length--)
121 _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
122
123 /* Set the value at the beginning or the end. */
124 if (prepend)
125 _intsetSet(is,0,value);
126 else
127 _intsetSet(is,is->length,value);
128 is->length++;
129 return is;
130 }
131
132 static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
133 void *src, *dst;
134 uint32_t bytes = is->length-from;
135 if (is->encoding == INTSET_ENC_INT64) {
136 src = (int64_t*)is->contents+from;
137 dst = (int64_t*)is->contents+to;
138 bytes *= sizeof(int64_t);
139 } else if (is->encoding == INTSET_ENC_INT32) {
140 src = (int32_t*)is->contents+from;
141 dst = (int32_t*)is->contents+to;
142 bytes *= sizeof(int32_t);
143 } else {
144 src = (int16_t*)is->contents+from;
145 dst = (int16_t*)is->contents+to;
146 bytes *= sizeof(int16_t);
147 }
148 memmove(dst,src,bytes);
149 }
150
151 /* Insert an integer in the intset */
152 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
153 uint8_t valenc = _intsetValueEncoding(value);
154 uint32_t pos;
155 if (success) *success = 1;
156
157 /* Upgrade encoding if necessary. If we need to upgrade, we know that
158 * this value should be either appended (if > 0) or prepended (if < 0),
159 * because it lies outside the range of existing values. */
160 if (valenc > is->encoding) {
161 /* This always succeeds, so we don't need to curry *success. */
162 return intsetUpgradeAndAdd(is,value);
163 } else {
164 /* Abort if the value is already present in the set.
165 * This call will populate "pos" with the right position to insert
166 * the value when it cannot be found. */
167 if (intsetSearch(is,value,&pos)) {
168 if (success) *success = 0;
169 return is;
170 }
171
172 is = intsetResize(is,is->length+1);
173 if (pos < is->length) intsetMoveTail(is,pos,pos+1);
174 }
175
176 _intsetSet(is,pos,value);
177 is->length++;
178 return is;
179 }
180
181 /* Delete integer from intset */
182 intset *intsetRemove(intset *is, int64_t value, int *success) {
183 uint8_t valenc = _intsetValueEncoding(value);
184 uint32_t pos;
185 if (success) *success = 0;
186
187 if (valenc <= is->encoding && intsetSearch(is,value,&pos)) {
188 /* We know we can delete */
189 if (success) *success = 1;
190
191 /* Overwrite value with tail and update length */
192 if (pos < (is->length-1)) intsetMoveTail(is,pos+1,pos);
193 is = intsetResize(is,is->length-1);
194 is->length--;
195 }
196 return is;
197 }
198
199 /* Determine whether a value belongs to this set */
200 uint8_t intsetFind(intset *is, int64_t value) {
201 uint8_t valenc = _intsetValueEncoding(value);
202 return valenc <= is->encoding && intsetSearch(is,value,NULL);
203 }
204
205 /* Return random member */
206 int64_t intsetRandom(intset *is) {
207 return _intsetGet(is,rand()%is->length);
208 }
209
210 /* Sets the value to the value at the given position. When this position is
211 * out of range the function returns 0, when in range it returns 1. */
212 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
213 if (pos < is->length) {
214 *value = _intsetGet(is,pos);
215 return 1;
216 }
217 return 0;
218 }
219
220 /* Return intset length */
221 uint32_t intsetLen(intset *is) {
222 return is->length;
223 }
224
225 /* Return intset blob size in bytes. */
226 size_t intsetBlobLen(intset *is) {
227 return sizeof(intset)+is->length*is->encoding;
228 }
229
230 #ifdef INTSET_TEST_MAIN
231 #include <sys/time.h>
232
233 void intsetRepr(intset *is) {
234 int i;
235 for (i = 0; i < is->length; i++) {
236 printf("%lld\n", (uint64_t)_intsetGet(is,i));
237 }
238 printf("\n");
239 }
240
241 void error(char *err) {
242 printf("%s\n", err);
243 exit(1);
244 }
245
246 void ok(void) {
247 printf("OK\n");
248 }
249
250 long long usec(void) {
251 struct timeval tv;
252 gettimeofday(&tv,NULL);
253 return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
254 }
255
256 #define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
257 void _assert(char *estr, char *file, int line) {
258 printf("\n\n=== ASSERTION FAILED ===\n");
259 printf("==> %s:%d '%s' is not true\n",file,line,estr);
260 }
261
262 intset *createSet(int bits, int size) {
263 uint64_t mask = (1<<bits)-1;
264 uint64_t i, value;
265 intset *is = intsetNew();
266
267 for (i = 0; i < size; i++) {
268 if (bits > 32) {
269 value = (rand()*rand()) & mask;
270 } else {
271 value = rand() & mask;
272 }
273 is = intsetAdd(is,value,NULL);
274 }
275 return is;
276 }
277
278 void checkConsistency(intset *is) {
279 int i;
280
281 for (i = 0; i < (is->length-1); i++) {
282 if (is->encoding == INTSET_ENC_INT16) {
283 int16_t *i16 = (int16_t*)is->contents;
284 assert(i16[i] < i16[i+1]);
285 } else if (is->encoding == INTSET_ENC_INT32) {
286 int32_t *i32 = (int32_t*)is->contents;
287 assert(i32[i] < i32[i+1]);
288 } else {
289 int64_t *i64 = (int64_t*)is->contents;
290 assert(i64[i] < i64[i+1]);
291 }
292 }
293 }
294
295 int main(int argc, char **argv) {
296 uint8_t success;
297 int i;
298 intset *is;
299 sranddev();
300
301 printf("Value encodings: "); {
302 assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
303 assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);
304 assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);
305 assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);
306 assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);
307 assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
308 assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
309 assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
310 assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
311 assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
312 ok();
313 }
314
315 printf("Basic adding: "); {
316 is = intsetNew();
317 is = intsetAdd(is,5,&success); assert(success);
318 is = intsetAdd(is,6,&success); assert(success);
319 is = intsetAdd(is,4,&success); assert(success);
320 is = intsetAdd(is,4,&success); assert(!success);
321 ok();
322 }
323
324 printf("Large number of random adds: "); {
325 int inserts = 0;
326 is = intsetNew();
327 for (i = 0; i < 1024; i++) {
328 is = intsetAdd(is,rand()%0x800,&success);
329 if (success) inserts++;
330 }
331 assert(is->length == inserts);
332 checkConsistency(is);
333 ok();
334 }
335
336 printf("Upgrade from int16 to int32: "); {
337 is = intsetNew();
338 is = intsetAdd(is,32,NULL);
339 assert(is->encoding == INTSET_ENC_INT16);
340 is = intsetAdd(is,65535,NULL);
341 assert(is->encoding == INTSET_ENC_INT32);
342 assert(intsetFind(is,32));
343 assert(intsetFind(is,65535));
344 checkConsistency(is);
345
346 is = intsetNew();
347 is = intsetAdd(is,32,NULL);
348 assert(is->encoding == INTSET_ENC_INT16);
349 is = intsetAdd(is,-65535,NULL);
350 assert(is->encoding == INTSET_ENC_INT32);
351 assert(intsetFind(is,32));
352 assert(intsetFind(is,-65535));
353 checkConsistency(is);
354 ok();
355 }
356
357 printf("Upgrade from int16 to int64: "); {
358 is = intsetNew();
359 is = intsetAdd(is,32,NULL);
360 assert(is->encoding == INTSET_ENC_INT16);
361 is = intsetAdd(is,4294967295,NULL);
362 assert(is->encoding == INTSET_ENC_INT64);
363 assert(intsetFind(is,32));
364 assert(intsetFind(is,4294967295));
365 checkConsistency(is);
366
367 is = intsetNew();
368 is = intsetAdd(is,32,NULL);
369 assert(is->encoding == INTSET_ENC_INT16);
370 is = intsetAdd(is,-4294967295,NULL);
371 assert(is->encoding == INTSET_ENC_INT64);
372 assert(intsetFind(is,32));
373 assert(intsetFind(is,-4294967295));
374 checkConsistency(is);
375 ok();
376 }
377
378 printf("Upgrade from int32 to int64: "); {
379 is = intsetNew();
380 is = intsetAdd(is,65535,NULL);
381 assert(is->encoding == INTSET_ENC_INT32);
382 is = intsetAdd(is,4294967295,NULL);
383 assert(is->encoding == INTSET_ENC_INT64);
384 assert(intsetFind(is,65535));
385 assert(intsetFind(is,4294967295));
386 checkConsistency(is);
387
388 is = intsetNew();
389 is = intsetAdd(is,65535,NULL);
390 assert(is->encoding == INTSET_ENC_INT32);
391 is = intsetAdd(is,-4294967295,NULL);
392 assert(is->encoding == INTSET_ENC_INT64);
393 assert(intsetFind(is,65535));
394 assert(intsetFind(is,-4294967295));
395 checkConsistency(is);
396 ok();
397 }
398
399 printf("Stress lookups: "); {
400 long num = 100000, size = 10000;
401 int i, bits = 20;
402 long long start;
403 is = createSet(bits,size);
404 checkConsistency(is);
405
406 start = usec();
407 for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
408 printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
409 }
410
411 printf("Stress add+delete: "); {
412 int i, v1, v2;
413 is = intsetNew();
414 for (i = 0; i < 0xffff; i++) {
415 v1 = rand() % 0xfff;
416 is = intsetAdd(is,v1,NULL);
417 assert(intsetFind(is,v1));
418
419 v2 = rand() % 0xfff;
420 is = intsetRemove(is,v2,NULL);
421 assert(!intsetFind(is,v2));
422 }
423 checkConsistency(is);
424 ok();
425 }
426 }
427 #endif