]> git.saurik.com Git - redis.git/blob - src/intset.c
a837592dc57b36b359bc88c9c0cbb3dcfbd34592
[redis.git] / src / intset.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include "intset.h"
5 #include "zmalloc.h"
6
7 /* Note that these encodings are ordered, so:
8 * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
9 #define INTSET_ENC_INT16 (sizeof(int16_t))
10 #define INTSET_ENC_INT32 (sizeof(int32_t))
11 #define INTSET_ENC_INT64 (sizeof(int64_t))
12
13 /* Return the required encoding for the provided value. */
14 static uint8_t _intsetValueEncoding(int64_t v) {
15 if (v < INT32_MIN || v > INT32_MAX)
16 return INTSET_ENC_INT64;
17 else if (v < INT16_MIN || v > INT16_MAX)
18 return INTSET_ENC_INT32;
19 return INTSET_ENC_INT16;
20 }
21
22 /* Return the value at pos, given an encoding. */
23 static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
24 if (enc == INTSET_ENC_INT64)
25 return ((int64_t*)is->contents)[pos];
26 else if (enc == INTSET_ENC_INT32)
27 return ((int32_t*)is->contents)[pos];
28 return ((int16_t*)is->contents)[pos];
29 }
30
31 /* Return the value at pos, using the configured encoding. */
32 static int64_t _intsetGet(intset *is, int pos) {
33 return _intsetGetEncoded(is,pos,is->encoding);
34 }
35
36 /* Set the value at pos, using the configured encoding. */
37 static void _intsetSet(intset *is, int pos, int64_t value) {
38 if (is->encoding == INTSET_ENC_INT64)
39 ((int64_t*)is->contents)[pos] = value;
40 else if (is->encoding == INTSET_ENC_INT32)
41 ((int32_t*)is->contents)[pos] = value;
42 else
43 ((int16_t*)is->contents)[pos] = value;
44 }
45
46 /* Create an empty intset. */
47 intset *intsetNew(void) {
48 intset *is = zmalloc(sizeof(intset));
49 is->encoding = INTSET_ENC_INT16;
50 is->length = 0;
51 return is;
52 }
53
54 /* Resize the intset */
55 static intset *intsetResize(intset *is, uint32_t len) {
56 uint32_t size = len*is->encoding;
57 is = zrealloc(is,sizeof(intset)+size);
58 return is;
59 }
60
61 static intset *intsetUpgrade(intset *is, uint8_t newenc, uint8_t extra, uint8_t offset) {
62 uint8_t curenc = is->encoding;
63 int length = is->length;
64
65 /* First set new encoding and resize */
66 is->encoding = newenc;
67 is = intsetResize(is,is->length+extra);
68
69 /* Upgrade back-to-front so we don't overwrite values */
70 while(length--)
71 _intsetSet(is,length+offset,_intsetGetEncoded(is,length,curenc));
72 return is;
73 }
74
75 /* Search for the position of "value". Return 1 when the value was found and
76 * sets "pos" to the position of the value within the intset. Return 0 when
77 * the value is not present in the intset and sets "pos" to the position
78 * where "value" can be inserted. */
79 static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
80 int min = 0, max = is->length-1, mid = -1;
81 int64_t cur = -1;
82
83 /* The value can never be found when the set is empty */
84 if (is->length == 0) {
85 if (pos) *pos = 0;
86 return 0;
87 } else {
88 /* Check for the case where we know we cannot find the value,
89 * but do know the insert position. */
90 if (value > _intsetGet(is,is->length-1)) {
91 if (pos) *pos = is->length;
92 return 0;
93 } else if (value < _intsetGet(is,0)) {
94 if (pos) *pos = 0;
95 return 0;
96 }
97 }
98
99 while(max >= min) {
100 mid = (min+max)/2;
101 cur = _intsetGet(is,mid);
102 if (value > cur) {
103 min = mid+1;
104 } else if (value < cur) {
105 max = mid-1;
106 } else {
107 break;
108 }
109 }
110
111 if (value == cur) {
112 if (pos) *pos = mid;
113 return 1;
114 } else {
115 if (pos) *pos = min;
116 return 0;
117 }
118 }
119
120 static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
121 void *src, *dst;
122 uint32_t bytes = is->length-from;
123 if (is->encoding == INTSET_ENC_INT64) {
124 src = (int64_t*)is->contents+from;
125 dst = (int64_t*)is->contents+to;
126 bytes *= sizeof(int64_t);
127 } else if (is->encoding == INTSET_ENC_INT32) {
128 src = (int32_t*)is->contents+from;
129 dst = (int32_t*)is->contents+to;
130 bytes *= sizeof(int32_t);
131 } else {
132 src = (int16_t*)is->contents+from;
133 dst = (int16_t*)is->contents+to;
134 bytes *= sizeof(int16_t);
135 }
136 memmove(dst,src,bytes);
137 }
138
139 /* Insert an integer in the intset */
140 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
141 uint8_t valenc = _intsetValueEncoding(value);
142 uint32_t pos, offset;
143 if (success) *success = 1;
144
145 /* Upgrade encoding if necessary. If we need to upgrade, we know that
146 * this value should be either appended (if > 0) or prepended (if < 0),
147 * because it lies outside the range of existing values. */
148 if (valenc > is->encoding) {
149 offset = value < 0 ? 1 : 0;
150 is = intsetUpgrade(is,valenc,1,offset);
151 pos = (value < 0) ? 0 : is->length;
152 } else {
153 /* Abort if the value is already present in the set.
154 * This call will populate "pos" with the right position to insert
155 * the value when it cannot be found. */
156 if (intsetSearch(is,value,&pos)) {
157 if (success) *success = 0;
158 return is;
159 }
160
161 is = intsetResize(is,is->length+1);
162 if (pos < is->length) intsetMoveTail(is,pos,pos+1);
163 }
164
165 _intsetSet(is,pos,value);
166 is->length++;
167 return is;
168 }
169
170 /* Delete integer from intset */
171 intset *intsetRemove(intset *is, int64_t value, uint8_t *success) {
172 uint8_t valenc = _intsetValueEncoding(value);
173 uint32_t pos;
174 if (success) *success = 0;
175
176 if (valenc <= is->encoding && intsetSearch(is,value,&pos)) {
177 /* We know we can delete */
178 if (success) *success = 1;
179
180 /* Overwrite value with tail and update length */
181 if (pos < (is->length-1)) intsetMoveTail(is,pos+1,pos);
182 is = intsetResize(is,is->length-1);
183 is->length--;
184 }
185 return is;
186 }
187
188 /* Determine whether a value belongs to this set */
189 uint8_t intsetFind(intset *is, int64_t value) {
190 uint8_t valenc = _intsetValueEncoding(value);
191 return valenc <= is->encoding && intsetSearch(is,value,NULL);
192 }
193
194 /* Return random member */
195 int64_t intsetRandom(intset *is) {
196 return _intsetGet(is,rand()%is->length);
197 }
198
199 /* Sets the value to the value at the given position. When this position is
200 * out of range the function returns 0, when in range it returns 1. */
201 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
202 if (pos < is->length) {
203 *value = _intsetGet(is,pos);
204 return 1;
205 }
206 return 0;
207 }
208
209 /* Return intset length */
210 uint32_t intsetLen(intset *is) {
211 return is->length;
212 }
213
214 #ifdef INTSET_TEST_MAIN
215 #include <sys/time.h>
216
217 void intsetRepr(intset *is) {
218 int i;
219 for (i = 0; i < is->length; i++) {
220 printf("%lld\n", (uint64_t)_intsetGet(is,i));
221 }
222 printf("\n");
223 }
224
225 void error(char *err) {
226 printf("%s\n", err);
227 exit(1);
228 }
229
230 void ok(void) {
231 printf("OK\n");
232 }
233
234 long long usec(void) {
235 struct timeval tv;
236 gettimeofday(&tv,NULL);
237 return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
238 }
239
240 #define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
241 void _assert(char *estr, char *file, int line) {
242 printf("\n\n=== ASSERTION FAILED ===\n");
243 printf("==> %s:%d '%s' is not true\n",file,line,estr);
244 }
245
246 intset *createSet(int bits, int size) {
247 uint64_t mask = (1<<bits)-1;
248 uint64_t i, value;
249 intset *is = intsetNew();
250
251 for (i = 0; i < size; i++) {
252 if (bits > 32) {
253 value = (rand()*rand()) & mask;
254 } else {
255 value = rand() & mask;
256 }
257 is = intsetAdd(is,value,NULL);
258 }
259 return is;
260 }
261
262 void checkConsistency(intset *is) {
263 int i;
264
265 for (i = 0; i < (is->length-1); i++) {
266 if (is->encoding == INTSET_ENC_INT16) {
267 int16_t *i16 = (int16_t*)is->contents;
268 assert(i16[i] < i16[i+1]);
269 } else if (is->encoding == INTSET_ENC_INT32) {
270 int32_t *i32 = (int32_t*)is->contents;
271 assert(i32[i] < i32[i+1]);
272 } else {
273 int64_t *i64 = (int64_t*)is->contents;
274 assert(i64[i] < i64[i+1]);
275 }
276 }
277 }
278
279 int main(int argc, char **argv) {
280 uint8_t success;
281 int i;
282 intset *is;
283 sranddev();
284
285 printf("Value encodings: "); {
286 assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
287 assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);
288 assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);
289 assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);
290 assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);
291 assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
292 assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
293 assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
294 assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
295 assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
296 ok();
297 }
298
299 printf("Basic adding: "); {
300 is = intsetNew();
301 is = intsetAdd(is,5,&success); assert(success);
302 is = intsetAdd(is,6,&success); assert(success);
303 is = intsetAdd(is,4,&success); assert(success);
304 is = intsetAdd(is,4,&success); assert(!success);
305 ok();
306 }
307
308 printf("Large number of random adds: "); {
309 int inserts = 0;
310 is = intsetNew();
311 for (i = 0; i < 1024; i++) {
312 is = intsetAdd(is,rand()%0x800,&success);
313 if (success) inserts++;
314 }
315 assert(is->length == inserts);
316 checkConsistency(is);
317 ok();
318 }
319
320 printf("Upgrade from int16 to int32: "); {
321 is = intsetNew();
322 is = intsetAdd(is,32,NULL);
323 assert(is->encoding == INTSET_ENC_INT16);
324 is = intsetAdd(is,65535,NULL);
325 assert(is->encoding == INTSET_ENC_INT32);
326 assert(intsetFind(is,32));
327 assert(intsetFind(is,65535));
328 checkConsistency(is);
329
330 is = intsetNew();
331 is = intsetAdd(is,32,NULL);
332 assert(is->encoding == INTSET_ENC_INT16);
333 is = intsetAdd(is,-65535,NULL);
334 assert(is->encoding == INTSET_ENC_INT32);
335 assert(intsetFind(is,32));
336 assert(intsetFind(is,-65535));
337 checkConsistency(is);
338 ok();
339 }
340
341 printf("Upgrade from int16 to int64: "); {
342 is = intsetNew();
343 is = intsetAdd(is,32,NULL);
344 assert(is->encoding == INTSET_ENC_INT16);
345 is = intsetAdd(is,4294967295,NULL);
346 assert(is->encoding == INTSET_ENC_INT64);
347 assert(intsetFind(is,32));
348 assert(intsetFind(is,4294967295));
349 checkConsistency(is);
350
351 is = intsetNew();
352 is = intsetAdd(is,32,NULL);
353 assert(is->encoding == INTSET_ENC_INT16);
354 is = intsetAdd(is,-4294967295,NULL);
355 assert(is->encoding == INTSET_ENC_INT64);
356 assert(intsetFind(is,32));
357 assert(intsetFind(is,-4294967295));
358 checkConsistency(is);
359 ok();
360 }
361
362 printf("Upgrade from int32 to int64: "); {
363 is = intsetNew();
364 is = intsetAdd(is,65535,NULL);
365 assert(is->encoding == INTSET_ENC_INT32);
366 is = intsetAdd(is,4294967295,NULL);
367 assert(is->encoding == INTSET_ENC_INT64);
368 assert(intsetFind(is,65535));
369 assert(intsetFind(is,4294967295));
370 checkConsistency(is);
371
372 is = intsetNew();
373 is = intsetAdd(is,65535,NULL);
374 assert(is->encoding == INTSET_ENC_INT32);
375 is = intsetAdd(is,-4294967295,NULL);
376 assert(is->encoding == INTSET_ENC_INT64);
377 assert(intsetFind(is,65535));
378 assert(intsetFind(is,-4294967295));
379 checkConsistency(is);
380 ok();
381 }
382
383 printf("Stress lookups: "); {
384 long num = 100000, size = 10000;
385 int i, bits = 20;
386 long long start;
387 is = createSet(bits,size);
388 checkConsistency(is);
389
390 start = usec();
391 for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
392 printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
393 }
394
395 printf("Stress add+delete: "); {
396 int i, v1, v2;
397 is = intsetNew();
398 for (i = 0; i < 0xffff; i++) {
399 v1 = rand() % 0xfff;
400 is = intsetAdd(is,v1,NULL);
401 assert(intsetFind(is,v1));
402
403 v2 = rand() % 0xfff;
404 is = intsetRemove(is,v2,NULL);
405 assert(!intsetFind(is,v2));
406 }
407 checkConsistency(is);
408 ok();
409 }
410 }
411 #endif