8 /* Note that these encodings are ordered, so:
9 * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
10 #define INTSET_ENC_INT16 (sizeof(int16_t))
11 #define INTSET_ENC_INT32 (sizeof(int32_t))
12 #define INTSET_ENC_INT64 (sizeof(int64_t))
14 /* Return the required encoding for the provided value. */
15 static uint8_t _intsetValueEncoding(int64_t v
) {
16 if (v
< INT32_MIN
|| v
> INT32_MAX
)
17 return INTSET_ENC_INT64
;
18 else if (v
< INT16_MIN
|| v
> INT16_MAX
)
19 return INTSET_ENC_INT32
;
21 return INTSET_ENC_INT16
;
24 /* Return the value at pos, given an encoding. */
25 static int64_t _intsetGetEncoded(intset
*is
, int pos
, uint8_t enc
) {
30 if (enc
== INTSET_ENC_INT64
) {
31 memcpy(&v64
,((int64_t*)is
->contents
)+pos
,sizeof(v64
));
34 } else if (enc
== INTSET_ENC_INT32
) {
35 memcpy(&v32
,((int32_t*)is
->contents
)+pos
,sizeof(v32
));
39 memcpy(&v16
,((int16_t*)is
->contents
)+pos
,sizeof(v16
));
45 /* Return the value at pos, using the configured encoding. */
46 static int64_t _intsetGet(intset
*is
, int pos
) {
47 return _intsetGetEncoded(is
,pos
,is
->encoding
);
50 /* Set the value at pos, using the configured encoding. */
51 static void _intsetSet(intset
*is
, int pos
, int64_t value
) {
52 if (is
->encoding
== INTSET_ENC_INT64
) {
53 ((int64_t*)is
->contents
)[pos
] = value
;
54 memrev64ifbe(((int64_t*)is
->contents
)+pos
);
55 } else if (is
->encoding
== INTSET_ENC_INT32
) {
56 ((int32_t*)is
->contents
)[pos
] = value
;
57 memrev32ifbe(((int32_t*)is
->contents
)+pos
);
59 ((int16_t*)is
->contents
)[pos
] = value
;
60 memrev16ifbe(((int16_t*)is
->contents
)+pos
);
64 /* Create an empty intset. */
65 intset
*intsetNew(void) {
66 intset
*is
= zmalloc(sizeof(intset
));
67 is
->encoding
= INTSET_ENC_INT16
;
72 /* Resize the intset */
73 static intset
*intsetResize(intset
*is
, uint32_t len
) {
74 uint32_t size
= len
*is
->encoding
;
75 is
= zrealloc(is
,sizeof(intset
)+size
);
79 /* Search for the position of "value". Return 1 when the value was found and
80 * sets "pos" to the position of the value within the intset. Return 0 when
81 * the value is not present in the intset and sets "pos" to the position
82 * where "value" can be inserted. */
83 static uint8_t intsetSearch(intset
*is
, int64_t value
, uint32_t *pos
) {
84 int min
= 0, max
= is
->length
-1, mid
= -1;
87 /* The value can never be found when the set is empty */
88 if (is
->length
== 0) {
92 /* Check for the case where we know we cannot find the value,
93 * but do know the insert position. */
94 if (value
> _intsetGet(is
,is
->length
-1)) {
95 if (pos
) *pos
= is
->length
;
97 } else if (value
< _intsetGet(is
,0)) {
105 cur
= _intsetGet(is
,mid
);
108 } else if (value
< cur
) {
124 /* Upgrades the intset to a larger encoding and inserts the given integer. */
125 static intset
*intsetUpgradeAndAdd(intset
*is
, int64_t value
) {
126 uint8_t curenc
= is
->encoding
;
127 uint8_t newenc
= _intsetValueEncoding(value
);
128 int length
= is
->length
;
129 int prepend
= value
< 0 ? 1 : 0;
131 /* First set new encoding and resize */
132 is
->encoding
= newenc
;
133 is
= intsetResize(is
,is
->length
+1);
135 /* Upgrade back-to-front so we don't overwrite values.
136 * Note that the "prepend" variable is used to make sure we have an empty
137 * space at either the beginning or the end of the intset. */
139 _intsetSet(is
,length
+prepend
,_intsetGetEncoded(is
,length
,curenc
));
141 /* Set the value at the beginning or the end. */
143 _intsetSet(is
,0,value
);
145 _intsetSet(is
,is
->length
,value
);
150 static void intsetMoveTail(intset
*is
, uint32_t from
, uint32_t to
) {
152 uint32_t bytes
= is
->length
-from
;
153 if (is
->encoding
== INTSET_ENC_INT64
) {
154 src
= (int64_t*)is
->contents
+from
;
155 dst
= (int64_t*)is
->contents
+to
;
156 bytes
*= sizeof(int64_t);
157 } else if (is
->encoding
== INTSET_ENC_INT32
) {
158 src
= (int32_t*)is
->contents
+from
;
159 dst
= (int32_t*)is
->contents
+to
;
160 bytes
*= sizeof(int32_t);
162 src
= (int16_t*)is
->contents
+from
;
163 dst
= (int16_t*)is
->contents
+to
;
164 bytes
*= sizeof(int16_t);
166 memmove(dst
,src
,bytes
);
169 /* Insert an integer in the intset */
170 intset
*intsetAdd(intset
*is
, int64_t value
, uint8_t *success
) {
171 uint8_t valenc
= _intsetValueEncoding(value
);
173 if (success
) *success
= 1;
175 /* Upgrade encoding if necessary. If we need to upgrade, we know that
176 * this value should be either appended (if > 0) or prepended (if < 0),
177 * because it lies outside the range of existing values. */
178 if (valenc
> is
->encoding
) {
179 /* This always succeeds, so we don't need to curry *success. */
180 return intsetUpgradeAndAdd(is
,value
);
182 /* Abort if the value is already present in the set.
183 * This call will populate "pos" with the right position to insert
184 * the value when it cannot be found. */
185 if (intsetSearch(is
,value
,&pos
)) {
186 if (success
) *success
= 0;
190 is
= intsetResize(is
,is
->length
+1);
191 if (pos
< is
->length
) intsetMoveTail(is
,pos
,pos
+1);
194 _intsetSet(is
,pos
,value
);
199 /* Delete integer from intset */
200 intset
*intsetRemove(intset
*is
, int64_t value
, int *success
) {
201 uint8_t valenc
= _intsetValueEncoding(value
);
203 if (success
) *success
= 0;
205 if (valenc
<= is
->encoding
&& intsetSearch(is
,value
,&pos
)) {
206 /* We know we can delete */
207 if (success
) *success
= 1;
209 /* Overwrite value with tail and update length */
210 if (pos
< (is
->length
-1)) intsetMoveTail(is
,pos
+1,pos
);
211 is
= intsetResize(is
,is
->length
-1);
217 /* Determine whether a value belongs to this set */
218 uint8_t intsetFind(intset
*is
, int64_t value
) {
219 uint8_t valenc
= _intsetValueEncoding(value
);
220 return valenc
<= is
->encoding
&& intsetSearch(is
,value
,NULL
);
223 /* Return random member */
224 int64_t intsetRandom(intset
*is
) {
225 return _intsetGet(is
,rand()%is
->length
);
228 /* Sets the value to the value at the given position. When this position is
229 * out of range the function returns 0, when in range it returns 1. */
230 uint8_t intsetGet(intset
*is
, uint32_t pos
, int64_t *value
) {
231 if (pos
< is
->length
) {
232 *value
= _intsetGet(is
,pos
);
238 /* Return intset length */
239 uint32_t intsetLen(intset
*is
) {
243 /* Return intset blob size in bytes. */
244 size_t intsetBlobLen(intset
*is
) {
245 return sizeof(intset
)+is
->length
*is
->encoding
;
248 #ifdef INTSET_TEST_MAIN
249 #include <sys/time.h>
251 void intsetRepr(intset
*is
) {
253 for (i
= 0; i
< is
->length
; i
++) {
254 printf("%lld\n", (uint64_t)_intsetGet(is
,i
));
259 void error(char *err
) {
268 long long usec(void) {
270 gettimeofday(&tv
,NULL
);
271 return (((long long)tv
.tv_sec
)*1000000)+tv
.tv_usec
;
274 #define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
275 void _assert(char *estr
, char *file
, int line
) {
276 printf("\n\n=== ASSERTION FAILED ===\n");
277 printf("==> %s:%d '%s' is not true\n",file
,line
,estr
);
280 intset
*createSet(int bits
, int size
) {
281 uint64_t mask
= (1<<bits
)-1;
283 intset
*is
= intsetNew();
285 for (i
= 0; i
< size
; i
++) {
287 value
= (rand()*rand()) & mask
;
289 value
= rand() & mask
;
291 is
= intsetAdd(is
,value
,NULL
);
296 void checkConsistency(intset
*is
) {
299 for (i
= 0; i
< (is
->length
-1); i
++) {
300 if (is
->encoding
== INTSET_ENC_INT16
) {
301 int16_t *i16
= (int16_t*)is
->contents
;
302 assert(i16
[i
] < i16
[i
+1]);
303 } else if (is
->encoding
== INTSET_ENC_INT32
) {
304 int32_t *i32
= (int32_t*)is
->contents
;
305 assert(i32
[i
] < i32
[i
+1]);
307 int64_t *i64
= (int64_t*)is
->contents
;
308 assert(i64
[i
] < i64
[i
+1]);
313 int main(int argc
, char **argv
) {
319 printf("Value encodings: "); {
320 assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16
);
321 assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16
);
322 assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32
);
323 assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32
);
324 assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32
);
325 assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32
);
326 assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64
);
327 assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64
);
328 assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64
);
329 assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64
);
333 printf("Basic adding: "); {
335 is
= intsetAdd(is
,5,&success
); assert(success
);
336 is
= intsetAdd(is
,6,&success
); assert(success
);
337 is
= intsetAdd(is
,4,&success
); assert(success
);
338 is
= intsetAdd(is
,4,&success
); assert(!success
);
342 printf("Large number of random adds: "); {
345 for (i
= 0; i
< 1024; i
++) {
346 is
= intsetAdd(is
,rand()%0x800
,&success
);
347 if (success
) inserts
++;
349 assert(is
->length
== inserts
);
350 checkConsistency(is
);
354 printf("Upgrade from int16 to int32: "); {
356 is
= intsetAdd(is
,32,NULL
);
357 assert(is
->encoding
== INTSET_ENC_INT16
);
358 is
= intsetAdd(is
,65535,NULL
);
359 assert(is
->encoding
== INTSET_ENC_INT32
);
360 assert(intsetFind(is
,32));
361 assert(intsetFind(is
,65535));
362 checkConsistency(is
);
365 is
= intsetAdd(is
,32,NULL
);
366 assert(is
->encoding
== INTSET_ENC_INT16
);
367 is
= intsetAdd(is
,-65535,NULL
);
368 assert(is
->encoding
== INTSET_ENC_INT32
);
369 assert(intsetFind(is
,32));
370 assert(intsetFind(is
,-65535));
371 checkConsistency(is
);
375 printf("Upgrade from int16 to int64: "); {
377 is
= intsetAdd(is
,32,NULL
);
378 assert(is
->encoding
== INTSET_ENC_INT16
);
379 is
= intsetAdd(is
,4294967295,NULL
);
380 assert(is
->encoding
== INTSET_ENC_INT64
);
381 assert(intsetFind(is
,32));
382 assert(intsetFind(is
,4294967295));
383 checkConsistency(is
);
386 is
= intsetAdd(is
,32,NULL
);
387 assert(is
->encoding
== INTSET_ENC_INT16
);
388 is
= intsetAdd(is
,-4294967295,NULL
);
389 assert(is
->encoding
== INTSET_ENC_INT64
);
390 assert(intsetFind(is
,32));
391 assert(intsetFind(is
,-4294967295));
392 checkConsistency(is
);
396 printf("Upgrade from int32 to int64: "); {
398 is
= intsetAdd(is
,65535,NULL
);
399 assert(is
->encoding
== INTSET_ENC_INT32
);
400 is
= intsetAdd(is
,4294967295,NULL
);
401 assert(is
->encoding
== INTSET_ENC_INT64
);
402 assert(intsetFind(is
,65535));
403 assert(intsetFind(is
,4294967295));
404 checkConsistency(is
);
407 is
= intsetAdd(is
,65535,NULL
);
408 assert(is
->encoding
== INTSET_ENC_INT32
);
409 is
= intsetAdd(is
,-4294967295,NULL
);
410 assert(is
->encoding
== INTSET_ENC_INT64
);
411 assert(intsetFind(is
,65535));
412 assert(intsetFind(is
,-4294967295));
413 checkConsistency(is
);
417 printf("Stress lookups: "); {
418 long num
= 100000, size
= 10000;
421 is
= createSet(bits
,size
);
422 checkConsistency(is
);
425 for (i
= 0; i
< num
; i
++) intsetSearch(is
,rand() % ((1<<bits
)-1),NULL
);
426 printf("%ld lookups, %ld element set, %lldusec\n",num
,size
,usec()-start
);
429 printf("Stress add+delete: "); {
432 for (i
= 0; i
< 0xffff; i
++) {
434 is
= intsetAdd(is
,v1
,NULL
);
435 assert(intsetFind(is
,v1
));
438 is
= intsetRemove(is
,v2
,NULL
);
439 assert(!intsetFind(is
,v2
));
441 checkConsistency(is
);