]>
git.saurik.com Git - redis.git/blob - src/intset.c
2 * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
3 * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of Redis nor the names of its contributors may be used
15 * to endorse or promote products derived from this software without
16 * specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
36 #include "endianconv.h"
38 /* Note that these encodings are ordered, so:
39 * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
40 #define INTSET_ENC_INT16 (sizeof(int16_t))
41 #define INTSET_ENC_INT32 (sizeof(int32_t))
42 #define INTSET_ENC_INT64 (sizeof(int64_t))
44 /* Return the required encoding for the provided value. */
45 static uint8_t _intsetValueEncoding(int64_t v
) {
46 if (v
< INT32_MIN
|| v
> INT32_MAX
)
47 return INTSET_ENC_INT64
;
48 else if (v
< INT16_MIN
|| v
> INT16_MAX
)
49 return INTSET_ENC_INT32
;
51 return INTSET_ENC_INT16
;
54 /* Return the value at pos, given an encoding. */
55 static int64_t _intsetGetEncoded(intset
*is
, int pos
, uint8_t enc
) {
60 if (enc
== INTSET_ENC_INT64
) {
61 memcpy(&v64
,((int64_t*)is
->contents
)+pos
,sizeof(v64
));
64 } else if (enc
== INTSET_ENC_INT32
) {
65 memcpy(&v32
,((int32_t*)is
->contents
)+pos
,sizeof(v32
));
69 memcpy(&v16
,((int16_t*)is
->contents
)+pos
,sizeof(v16
));
75 /* Return the value at pos, using the configured encoding. */
76 static int64_t _intsetGet(intset
*is
, int pos
) {
77 return _intsetGetEncoded(is
,pos
,intrev32ifbe(is
->encoding
));
80 /* Set the value at pos, using the configured encoding. */
81 static void _intsetSet(intset
*is
, int pos
, int64_t value
) {
82 uint32_t encoding
= intrev32ifbe(is
->encoding
);
84 if (encoding
== INTSET_ENC_INT64
) {
85 ((int64_t*)is
->contents
)[pos
] = value
;
86 memrev64ifbe(((int64_t*)is
->contents
)+pos
);
87 } else if (encoding
== INTSET_ENC_INT32
) {
88 ((int32_t*)is
->contents
)[pos
] = value
;
89 memrev32ifbe(((int32_t*)is
->contents
)+pos
);
91 ((int16_t*)is
->contents
)[pos
] = value
;
92 memrev16ifbe(((int16_t*)is
->contents
)+pos
);
96 /* Create an empty intset. */
97 intset
*intsetNew(void) {
98 intset
*is
= zmalloc(sizeof(intset
));
99 is
->encoding
= intrev32ifbe(INTSET_ENC_INT16
);
104 /* Resize the intset */
105 static intset
*intsetResize(intset
*is
, uint32_t len
) {
106 uint32_t size
= len
*intrev32ifbe(is
->encoding
);
107 is
= zrealloc(is
,sizeof(intset
)+size
);
111 /* Search for the position of "value". Return 1 when the value was found and
112 * sets "pos" to the position of the value within the intset. Return 0 when
113 * the value is not present in the intset and sets "pos" to the position
114 * where "value" can be inserted. */
115 static uint8_t intsetSearch(intset
*is
, int64_t value
, uint32_t *pos
) {
116 int min
= 0, max
= intrev32ifbe(is
->length
)-1, mid
= -1;
119 /* The value can never be found when the set is empty */
120 if (intrev32ifbe(is
->length
) == 0) {
124 /* Check for the case where we know we cannot find the value,
125 * but do know the insert position. */
126 if (value
> _intsetGet(is
,intrev32ifbe(is
->length
)-1)) {
127 if (pos
) *pos
= intrev32ifbe(is
->length
);
129 } else if (value
< _intsetGet(is
,0)) {
137 cur
= _intsetGet(is
,mid
);
140 } else if (value
< cur
) {
156 /* Upgrades the intset to a larger encoding and inserts the given integer. */
157 static intset
*intsetUpgradeAndAdd(intset
*is
, int64_t value
) {
158 uint8_t curenc
= intrev32ifbe(is
->encoding
);
159 uint8_t newenc
= _intsetValueEncoding(value
);
160 int length
= intrev32ifbe(is
->length
);
161 int prepend
= value
< 0 ? 1 : 0;
163 /* First set new encoding and resize */
164 is
->encoding
= intrev32ifbe(newenc
);
165 is
= intsetResize(is
,intrev32ifbe(is
->length
)+1);
167 /* Upgrade back-to-front so we don't overwrite values.
168 * Note that the "prepend" variable is used to make sure we have an empty
169 * space at either the beginning or the end of the intset. */
171 _intsetSet(is
,length
+prepend
,_intsetGetEncoded(is
,length
,curenc
));
173 /* Set the value at the beginning or the end. */
175 _intsetSet(is
,0,value
);
177 _intsetSet(is
,intrev32ifbe(is
->length
),value
);
178 is
->length
= intrev32ifbe(intrev32ifbe(is
->length
)+1);
182 static void intsetMoveTail(intset
*is
, uint32_t from
, uint32_t to
) {
184 uint32_t bytes
= intrev32ifbe(is
->length
)-from
;
185 uint32_t encoding
= intrev32ifbe(is
->encoding
);
187 if (encoding
== INTSET_ENC_INT64
) {
188 src
= (int64_t*)is
->contents
+from
;
189 dst
= (int64_t*)is
->contents
+to
;
190 bytes
*= sizeof(int64_t);
191 } else if (encoding
== INTSET_ENC_INT32
) {
192 src
= (int32_t*)is
->contents
+from
;
193 dst
= (int32_t*)is
->contents
+to
;
194 bytes
*= sizeof(int32_t);
196 src
= (int16_t*)is
->contents
+from
;
197 dst
= (int16_t*)is
->contents
+to
;
198 bytes
*= sizeof(int16_t);
200 memmove(dst
,src
,bytes
);
203 /* Insert an integer in the intset */
204 intset
*intsetAdd(intset
*is
, int64_t value
, uint8_t *success
) {
205 uint8_t valenc
= _intsetValueEncoding(value
);
207 if (success
) *success
= 1;
209 /* Upgrade encoding if necessary. If we need to upgrade, we know that
210 * this value should be either appended (if > 0) or prepended (if < 0),
211 * because it lies outside the range of existing values. */
212 if (valenc
> intrev32ifbe(is
->encoding
)) {
213 /* This always succeeds, so we don't need to curry *success. */
214 return intsetUpgradeAndAdd(is
,value
);
216 /* Abort if the value is already present in the set.
217 * This call will populate "pos" with the right position to insert
218 * the value when it cannot be found. */
219 if (intsetSearch(is
,value
,&pos
)) {
220 if (success
) *success
= 0;
224 is
= intsetResize(is
,intrev32ifbe(is
->length
)+1);
225 if (pos
< intrev32ifbe(is
->length
)) intsetMoveTail(is
,pos
,pos
+1);
228 _intsetSet(is
,pos
,value
);
229 is
->length
= intrev32ifbe(intrev32ifbe(is
->length
)+1);
233 /* Delete integer from intset */
234 intset
*intsetRemove(intset
*is
, int64_t value
, int *success
) {
235 uint8_t valenc
= _intsetValueEncoding(value
);
237 if (success
) *success
= 0;
239 if (valenc
<= intrev32ifbe(is
->encoding
) && intsetSearch(is
,value
,&pos
)) {
240 uint32_t len
= intrev32ifbe(is
->length
);
242 /* We know we can delete */
243 if (success
) *success
= 1;
245 /* Overwrite value with tail and update length */
246 if (pos
< (len
-1)) intsetMoveTail(is
,pos
+1,pos
);
247 is
= intsetResize(is
,len
-1);
248 is
->length
= intrev32ifbe(len
-1);
253 /* Determine whether a value belongs to this set */
254 uint8_t intsetFind(intset
*is
, int64_t value
) {
255 uint8_t valenc
= _intsetValueEncoding(value
);
256 return valenc
<= intrev32ifbe(is
->encoding
) && intsetSearch(is
,value
,NULL
);
259 /* Return random member */
260 int64_t intsetRandom(intset
*is
) {
261 return _intsetGet(is
,rand()%intrev
32ifbe
(is
->length
));
264 /* Sets the value to the value at the given position. When this position is
265 * out of range the function returns 0, when in range it returns 1. */
266 uint8_t intsetGet(intset
*is
, uint32_t pos
, int64_t *value
) {
267 if (pos
< intrev32ifbe(is
->length
)) {
268 *value
= _intsetGet(is
,pos
);
274 /* Return intset length */
275 uint32_t intsetLen(intset
*is
) {
276 return intrev32ifbe(is
->length
);
279 /* Return intset blob size in bytes. */
280 size_t intsetBlobLen(intset
*is
) {
281 return sizeof(intset
)+intrev32ifbe(is
->length
)*intrev32ifbe(is
->encoding
);
284 #ifdef INTSET_TEST_MAIN
285 #include <sys/time.h>
287 void intsetRepr(intset
*is
) {
289 for (i
= 0; i
< intrev32ifbe(is
->length
); i
++) {
290 printf("%lld\n", (uint64_t)_intsetGet(is
,i
));
295 void error(char *err
) {
304 long long usec(void) {
306 gettimeofday(&tv
,NULL
);
307 return (((long long)tv
.tv_sec
)*1000000)+tv
.tv_usec
;
310 #define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
311 void _assert(char *estr
, char *file
, int line
) {
312 printf("\n\n=== ASSERTION FAILED ===\n");
313 printf("==> %s:%d '%s' is not true\n",file
,line
,estr
);
316 intset
*createSet(int bits
, int size
) {
317 uint64_t mask
= (1<<bits
)-1;
319 intset
*is
= intsetNew();
321 for (i
= 0; i
< size
; i
++) {
323 value
= (rand()*rand()) & mask
;
325 value
= rand() & mask
;
327 is
= intsetAdd(is
,value
,NULL
);
332 void checkConsistency(intset
*is
) {
335 for (i
= 0; i
< (intrev32ifbe(is
->length
)-1); i
++) {
336 uint32_t encoding
= intrev32ifbe(is
->encoding
);
338 if (encoding
== INTSET_ENC_INT16
) {
339 int16_t *i16
= (int16_t*)is
->contents
;
340 assert(i16
[i
] < i16
[i
+1]);
341 } else if (encoding
== INTSET_ENC_INT32
) {
342 int32_t *i32
= (int32_t*)is
->contents
;
343 assert(i32
[i
] < i32
[i
+1]);
345 int64_t *i64
= (int64_t*)is
->contents
;
346 assert(i64
[i
] < i64
[i
+1]);
351 int main(int argc
, char **argv
) {
357 printf("Value encodings: "); {
358 assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16
);
359 assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16
);
360 assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32
);
361 assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32
);
362 assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32
);
363 assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32
);
364 assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64
);
365 assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64
);
366 assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64
);
367 assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64
);
371 printf("Basic adding: "); {
373 is
= intsetAdd(is
,5,&success
); assert(success
);
374 is
= intsetAdd(is
,6,&success
); assert(success
);
375 is
= intsetAdd(is
,4,&success
); assert(success
);
376 is
= intsetAdd(is
,4,&success
); assert(!success
);
380 printf("Large number of random adds: "); {
383 for (i
= 0; i
< 1024; i
++) {
384 is
= intsetAdd(is
,rand()%0x800
,&success
);
385 if (success
) inserts
++;
387 assert(intrev32ifbe(is
->length
) == inserts
);
388 checkConsistency(is
);
392 printf("Upgrade from int16 to int32: "); {
394 is
= intsetAdd(is
,32,NULL
);
395 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT16
);
396 is
= intsetAdd(is
,65535,NULL
);
397 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT32
);
398 assert(intsetFind(is
,32));
399 assert(intsetFind(is
,65535));
400 checkConsistency(is
);
403 is
= intsetAdd(is
,32,NULL
);
404 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT16
);
405 is
= intsetAdd(is
,-65535,NULL
);
406 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT32
);
407 assert(intsetFind(is
,32));
408 assert(intsetFind(is
,-65535));
409 checkConsistency(is
);
413 printf("Upgrade from int16 to int64: "); {
415 is
= intsetAdd(is
,32,NULL
);
416 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT16
);
417 is
= intsetAdd(is
,4294967295,NULL
);
418 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT64
);
419 assert(intsetFind(is
,32));
420 assert(intsetFind(is
,4294967295));
421 checkConsistency(is
);
424 is
= intsetAdd(is
,32,NULL
);
425 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT16
);
426 is
= intsetAdd(is
,-4294967295,NULL
);
427 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT64
);
428 assert(intsetFind(is
,32));
429 assert(intsetFind(is
,-4294967295));
430 checkConsistency(is
);
434 printf("Upgrade from int32 to int64: "); {
436 is
= intsetAdd(is
,65535,NULL
);
437 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT32
);
438 is
= intsetAdd(is
,4294967295,NULL
);
439 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT64
);
440 assert(intsetFind(is
,65535));
441 assert(intsetFind(is
,4294967295));
442 checkConsistency(is
);
445 is
= intsetAdd(is
,65535,NULL
);
446 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT32
);
447 is
= intsetAdd(is
,-4294967295,NULL
);
448 assert(intrev32ifbe(is
->encoding
) == INTSET_ENC_INT64
);
449 assert(intsetFind(is
,65535));
450 assert(intsetFind(is
,-4294967295));
451 checkConsistency(is
);
455 printf("Stress lookups: "); {
456 long num
= 100000, size
= 10000;
459 is
= createSet(bits
,size
);
460 checkConsistency(is
);
463 for (i
= 0; i
< num
; i
++) intsetSearch(is
,rand() % ((1<<bits
)-1),NULL
);
464 printf("%ld lookups, %ld element set, %lldusec\n",num
,size
,usec()-start
);
467 printf("Stress add+delete: "); {
470 for (i
= 0; i
< 0xffff; i
++) {
472 is
= intsetAdd(is
,v1
,NULL
);
473 assert(intsetFind(is
,v1
));
476 is
= intsetRemove(is
,v2
,NULL
);
477 assert(!intsetFind(is
,v2
));
479 checkConsistency(is
);