From d0b58d530027185a7fccd08bcda31efe06ef366b Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Sat, 12 Jun 2010 22:25:22 +0200 Subject: [PATCH] intset encoding for sets, refactored set tests to test both encodings --- Makefile | 3 +- intset.c | 19 ++- intset.h | 2 + redis.c | 137 ++++++++++++++++++--- tests/unit/type/set.tcl | 266 +++++++++++++++++++++++++--------------- 5 files changed, 308 insertions(+), 119 deletions(-) diff --git a/Makefile b/Makefile index 46df88bb..72524f7c 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ endif CCOPT= $(CFLAGS) $(CCLINK) $(ARCH) $(PROF) DEBUG?= -g -rdynamic -ggdb -OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o +OBJ = adlist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o intset.o BENCHOBJ = ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o CLIOBJ = anet.o sds.o adlist.o redis-cli.o zmalloc.o linenoise.o CHECKDUMPOBJ = redis-check-dump.o lzf_c.o lzf_d.o @@ -54,6 +54,7 @@ redis.o: redis.c fmacros.h config.h redis.h ae.h sds.h anet.h dict.h \ sds.o: sds.c sds.h zmalloc.h zipmap.o: zipmap.c zmalloc.h ziplist.o: ziplist.c zmalloc.h +intset.o: intset.c zmalloc.h zmalloc.o: zmalloc.c config.h redis-server: $(OBJ) diff --git a/intset.c b/intset.c index 1ee8263c..2532582e 100644 --- a/intset.c +++ b/intset.c @@ -60,8 +60,8 @@ static intset *intsetUpgrade(intset *is, uint8_t newenc, uint8_t extra, uint8_t * the value is not present in the intset and sets "pos" to the position * where "value" can be inserted. */ static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) { - int min = 0, max = is->length-1, mid; - int64_t cur; + int min = 0, max = is->length-1, mid = -1; + int64_t cur = -1; /* The value can never be found when the set is empty */ if (is->length == 0) { @@ -179,6 +179,21 @@ int64_t intsetRandom(intset *is) { return INTSET_GET(is,rand()%is->length); } +/* Sets the value to the value at the given position. When this position is + * out of range the function returns 0, when in range it returns 1. */ +uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) { + if (pos < is->length) { + *value = INTSET_GET(is,pos); + return 1; + } + return 0; +} + +/* Return intset length */ +uint32_t intsetLen(intset *is) { + return is->length; +} + #ifdef INTSET_TEST_MAIN #include diff --git a/intset.h b/intset.h index b90c44e0..25afc18d 100644 --- a/intset.h +++ b/intset.h @@ -13,5 +13,7 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success); intset *intsetRemove(intset *is, int64_t value, uint8_t *success); uint8_t intsetFind(intset *is, int64_t value); int64_t intsetRandom(intset *is); +uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value); +uint32_t intsetLen(intset *is); #endif // __INTSET_H diff --git a/redis.c b/redis.c index cb7bca8e..c1df3293 100644 --- a/redis.c +++ b/redis.c @@ -76,6 +76,7 @@ #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ #include "zipmap.h" /* Compact dictionary-alike data structure */ #include "ziplist.h" /* Compact list data structure */ +#include "intset.h" /* Compact integer set structure */ #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */ #include "release.h" /* Release and/or git repository information */ @@ -132,9 +133,10 @@ #define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */ #define REDIS_ENCODING_LIST 4 /* Encoded as zipmap */ #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */ +#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */ static char* strencoding[] = { - "raw", "int", "hashtable", "zipmap", "list", "ziplist" + "raw", "int", "hashtable", "zipmap", "list", "ziplist", "intset" }; /* Object types only used for dumping to disk */ @@ -651,6 +653,7 @@ static void call(redisClient *c, struct redisCommand *cmd); static void resetClient(redisClient *c); static void convertToRealHash(robj *o); static void listTypeConvert(robj *o, int enc); +static void setTypeConvert(robj *o, int enc); static int pubsubUnsubscribeAllChannels(redisClient *c, int notify); static int pubsubUnsubscribeAllPatterns(redisClient *c, int notify); static void freePubsubPattern(void *p); @@ -3069,6 +3072,13 @@ static robj *createSetObject(void) { return o; } +static robj *createIntsetObject(void) { + intset *is = intsetNew(); + robj *o = createObject(REDIS_SET,is); + o->encoding = REDIS_ENCODING_INTSET; + return o; +} + static robj *createHashObject(void) { /* All the Hashes start as zipmaps. Will be automatically converted * into hash tables if there are enough elements or big elements @@ -3107,7 +3117,16 @@ static void freeListObject(robj *o) { } static void freeSetObject(robj *o) { - dictRelease((dict*) o->ptr); + switch (o->encoding) { + case REDIS_ENCODING_HT: + dictRelease((dict*) o->ptr); + break; + case REDIS_ENCODING_INTSET: + zfree(o->ptr); + break; + default: + redisPanic("Unknown set encoding type"); + } } static void freeZsetObject(robj *o) { @@ -3371,7 +3390,7 @@ static int getLongLongFromObject(robj *o, long long *target) { } } - *target = value; + if (target) *target = value; return REDIS_OK; } @@ -3789,17 +3808,29 @@ static int rdbSaveObject(FILE *fp, robj *o) { } } else if (o->type == REDIS_SET) { /* Save a set value */ - dict *set = o->ptr; - dictIterator *di = dictGetIterator(set); - dictEntry *de; - - if (rdbSaveLen(fp,dictSize(set)) == -1) return -1; - while((de = dictNext(di)) != NULL) { - robj *eleobj = dictGetEntryKey(de); + if (o->encoding == REDIS_ENCODING_HT) { + dict *set = o->ptr; + dictIterator *di = dictGetIterator(set); + dictEntry *de; - if (rdbSaveStringObject(fp,eleobj) == -1) return -1; + if (rdbSaveLen(fp,dictSize(set)) == -1) return -1; + while((de = dictNext(di)) != NULL) { + robj *eleobj = dictGetEntryKey(de); + if (rdbSaveStringObject(fp,eleobj) == -1) return -1; + } + dictReleaseIterator(di); + } else if (o->encoding == REDIS_ENCODING_INTSET) { + intset *is = o->ptr; + long long llval; + int i = 0; + + if (rdbSaveLen(fp,intsetLen(is)) == -1) return -1; + while(intsetGet(is,i++,&llval)) { + if (rdbSaveLongLongAsStringObject(fp,llval) == -1) return -1; + } + } else { + redisPanic("Unknown set encoding"); } - dictReleaseIterator(di); } else if (o->type == REDIS_ZSET) { /* Save a set value */ zset *zs = o->ptr; @@ -5459,12 +5490,37 @@ static void rpoplpushcommand(redisClient *c) { /* ==================================== Sets ================================ */ +/* Factory method to return a set that *can* hold "value". When the object has + * an integer-encodable value, an intset will be returned. Otherwise a regular + * hash table. */ +static robj *setTypeCreate(robj *value) { + if (getLongLongFromObject(value,NULL) == REDIS_OK) + return createIntsetObject(); + return createSetObject(); +} + static int setTypeAdd(robj *subject, robj *value) { + long long llval; if (subject->encoding == REDIS_ENCODING_HT) { if (dictAdd(subject->ptr,value,NULL) == DICT_OK) { incrRefCount(value); return 1; } + } else if (subject->encoding == REDIS_ENCODING_INTSET) { + if (getLongLongFromObject(value,&llval) == REDIS_OK) { + uint8_t success; + subject->ptr = intsetAdd(subject->ptr,llval,&success); + if (success) return 1; + } else { + /* Failed to get integer from object, convert to regular set. */ + setTypeConvert(subject,REDIS_ENCODING_HT); + + /* The set *was* an intset and this value is not integer + * encodable, so dictAdd should always work. */ + redisAssert(dictAdd(subject->ptr,value,NULL) == DICT_OK); + incrRefCount(value); + return 1; + } } else { redisPanic("Unknown set encoding"); } @@ -5472,11 +5528,18 @@ static int setTypeAdd(robj *subject, robj *value) { } static int setTypeRemove(robj *subject, robj *value) { + long long llval; if (subject->encoding == REDIS_ENCODING_HT) { if (dictDelete(subject->ptr,value) == DICT_OK) { if (htNeedsResize(subject->ptr)) dictResize(subject->ptr); return 1; } + } else if (subject->encoding == REDIS_ENCODING_INTSET) { + if (getLongLongFromObject(value,&llval) == REDIS_OK) { + uint8_t success; + subject->ptr = intsetRemove(subject->ptr,llval,&success); + if (success) return 1; + } } else { redisPanic("Unknown set encoding"); } @@ -5484,24 +5547,35 @@ static int setTypeRemove(robj *subject, robj *value) { } static int setTypeIsMember(robj *subject, robj *value) { + long long llval; if (subject->encoding == REDIS_ENCODING_HT) { return dictFind((dict*)subject->ptr,value) != NULL; + } else if (subject->encoding == REDIS_ENCODING_INTSET) { + if (getLongLongFromObject(value,&llval) == REDIS_OK) { + return intsetFind((intset*)subject->ptr,llval); + } } else { redisPanic("Unknown set encoding"); } + return 0; } /* Structure to hold set iteration abstraction. */ typedef struct { + robj *subject; int encoding; + int ii; /* intset iterator */ dictIterator *di; } setIterator; static setIterator *setTypeInitIterator(robj *subject) { setIterator *si = zmalloc(sizeof(setIterator)); + si->subject = subject; si->encoding = subject->encoding; if (si->encoding == REDIS_ENCODING_HT) { si->di = dictGetIterator(subject->ptr); + } else if (si->encoding == REDIS_ENCODING_INTSET) { + si->ii = 0; } else { redisPanic("Unknown set encoding"); } @@ -5525,6 +5599,10 @@ static robj *setTypeNext(setIterator *si) { ret = dictGetEntryKey(de); incrRefCount(ret); } + } else if (si->encoding == REDIS_ENCODING_INTSET) { + long long llval; + if (intsetGet(si->subject->ptr,si->ii++,&llval)) + ret = createStringObjectFromLongLong(llval); } return ret; } @@ -5538,6 +5616,9 @@ robj *setTypeRandomElement(robj *subject) { dictEntry *de = dictGetRandomKey(subject->ptr); ret = dictGetEntryKey(de); incrRefCount(ret); + } else if (subject->encoding == REDIS_ENCODING_INTSET) { + long long llval = intsetRandom(subject->ptr); + ret = createStringObjectFromLongLong(llval); } else { redisPanic("Unknown set encoding"); } @@ -5547,17 +5628,41 @@ robj *setTypeRandomElement(robj *subject) { static unsigned long setTypeSize(robj *subject) { if (subject->encoding == REDIS_ENCODING_HT) { return dictSize((dict*)subject->ptr); + } else if (subject->encoding == REDIS_ENCODING_INTSET) { + return intsetLen((intset*)subject->ptr); } else { redisPanic("Unknown set encoding"); } } +static void setTypeConvert(robj *subject, int enc) { + setIterator *si; + robj *element; + redisAssert(subject->type == REDIS_SET); + + if (enc == REDIS_ENCODING_HT) { + dict *d = dictCreate(&setDictType,NULL); + + /* setTypeGet returns a robj with incremented refcount */ + si = setTypeInitIterator(subject); + while ((element = setTypeNext(si)) != NULL) + redisAssert(dictAdd(d,element,NULL) == DICT_OK); + setTypeReleaseIterator(si); + + subject->encoding = REDIS_ENCODING_HT; + zfree(subject->ptr); + subject->ptr = d; + } else { + redisPanic("Unsupported set conversion"); + } +} + static void saddCommand(redisClient *c) { robj *set; set = lookupKeyWrite(c->db,c->argv[1]); if (set == NULL) { - set = createSetObject(); + set = setTypeCreate(c->argv[2]); dbAdd(c->db,c->argv[1],set); } else { if (set->type != REDIS_SET) { @@ -5616,7 +5721,7 @@ static void smoveCommand(redisClient *c) { server.dirty++; /* Add the element to the destination set */ if (!dstset) { - dstset = createSetObject(); + dstset = setTypeCreate(c->argv[3]); dbAdd(c->db,c->argv[2],dstset); } setTypeAdd(dstset,c->argv[3]); @@ -5724,7 +5829,7 @@ static void sinterGenericCommand(redisClient *c, robj **setkeys, unsigned long s } else { /* If we have a target key where to store the resulting set * create this key with an empty set inside */ - dstset = createSetObject(); + dstset = createIntsetObject(); } /* Iterate all the elements of the first (smallest) set, and test @@ -5802,7 +5907,7 @@ static void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, /* We need a temp set object to store our union. If the dstkey * is not NULL (that is, we are inside an SUNIONSTORE operation) then * this set object will be the resulting object to set into the target key*/ - dstset = createSetObject(); + dstset = createIntsetObject(); /* Iterate all the elements of all the sets, add every element a single * time to the result set */ diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 58ea2b5b..c4fd4d76 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -1,119 +1,185 @@ start_server {tags {"set"}} { - test {SADD, SCARD, SISMEMBER, SMEMBERS basics} { - r sadd myset foo - r sadd myset bar - list [r scard myset] [r sismember myset foo] \ - [r sismember myset bar] [r sismember myset bla] \ - [lsort [r smembers myset]] - } {2 1 1 0 {bar foo}} - - test {SADD adding the same element multiple times} { - r sadd myset foo - r sadd myset foo - r sadd myset foo - r scard myset - } {2} + proc create_set {key entries} { + r del $key + foreach entry $entries { r sadd $key $entry } + } + + test {SADD, SCARD, SISMEMBER, SMEMBERS basics - regular set} { + create_set myset {foo} + assert_encoding hashtable myset + assert_equal 1 [r sadd myset bar] + assert_equal 0 [r sadd myset bar] + assert_equal 2 [r scard myset] + assert_equal 1 [r sismember myset foo] + assert_equal 1 [r sismember myset bar] + assert_equal 0 [r sismember myset bla] + assert_equal {bar foo} [lsort [r smembers myset]] + } + + test {SADD, SCARD, SISMEMBER, SMEMBERS basics - intset} { + create_set myset {17} + assert_encoding intset myset + assert_equal 1 [r sadd myset 16] + assert_equal 0 [r sadd myset 16] + assert_equal 2 [r scard myset] + assert_equal 1 [r sismember myset 16] + assert_equal 1 [r sismember myset 17] + assert_equal 0 [r sismember myset 18] + assert_equal {16 17} [lsort [r smembers myset]] + } test {SADD against non set} { r lpush mylist foo - catch {r sadd mylist bar} err - format $err - } {ERR*kind*} - - test {SREM basics} { - r sadd myset ciao - r srem myset foo - lsort [r smembers myset] - } {bar ciao} - - test {Mass SADD and SINTER with two sets} { + assert_error ERR*kind* {r sadd mylist bar} + } + + test {SREM basics - regular set} { + create_set myset {foo bar ciao} + assert_encoding hashtable myset + assert_equal 0 [r srem myset qux] + assert_equal 1 [r srem myset foo] + assert_equal {bar ciao} [lsort [r smembers myset]] + } + + test {SREM basics - intset} { + create_set myset {3 4 5} + assert_encoding intset myset + assert_equal 0 [r srem myset 6] + assert_equal 1 [r srem myset 4] + assert_equal {3 5} [lsort [r smembers myset]] + } + + foreach {type} {hashtable intset} { + for {set i 1} {$i <= 5} {incr i} { + r del [format "set%d" $i] + } for {set i 0} {$i < 1000} {incr i} { r sadd set1 $i r sadd set2 [expr $i+995] } - lsort [r sinter set1 set2] - } {995 996 997 998 999} + foreach i {999 995 1000 2000} { + r sadd set3 $i + } + for {set i 5} {$i < 1000} {incr i} { + r sadd set4 $i + } + r sadd set5 0 + + # it is possible that a hashtable encoded only contains integers, + # because it is converted from an intset to a hashtable when a + # non-integer element is added and then removed. + if {$type eq "hashtable"} { + for {set i 1} {$i <= 5} {incr i} { + r sadd [format "set%d" $i] foo + r srem [format "set%d" $i] foo + } + } + + test "Generated sets must be encoded as $type" { + for {set i 1} {$i <= 5} {incr i} { + assert_encoding $type [format "set%d" $i] + } + } + + test "SINTER with two sets - $type" { + assert_equal {995 996 997 998 999} [lsort [r sinter set1 set2]] + } + + test "SINTERSTORE with two sets - $type" { + r sinterstore setres set1 set2 + assert_encoding intset setres + assert_equal {995 996 997 998 999} [lsort [r smembers setres]] + } + + test "SINTERSTORE with two sets, after a DEBUG RELOAD - $type" { + r debug reload + r sinterstore setres set1 set2 + assert_encoding intset setres + assert_equal {995 996 997 998 999} [lsort [r smembers setres]] + } + + test "SUNION with two sets - $type" { + set expected [lsort -uniq "[r smembers set1] [r smembers set2]"] + assert_equal $expected [lsort [r sunion set1 set2]] + } + + test "SUNIONSTORE with two sets - $type" { + r sunionstore setres set1 set2 + assert_encoding intset setres + set expected [lsort -uniq "[r smembers set1] [r smembers set2]"] + assert_equal $expected [lsort [r smembers setres]] + } + + test "SINTER against three sets - $type" { + assert_equal {995 999} [lsort [r sinter set1 set2 set3]] + } + + test "SINTERSTORE with three sets - $type" { + r sinterstore setres set1 set2 set3 + assert_equal {995 999} [r smembers setres] + } + + test "SUNION with non existing keys - $type" { + set expected [lsort -uniq "[r smembers set1] [r smembers set2]"] + assert_equal $expected [lsort [r sunion nokey1 set1 set2 nokey2]] + } + + test "SDIFF with two sets - $type" { + assert_equal {0 1 2 3 4} [lsort [r sdiff set1 set4]] + } - test {SUNION with two sets} { - lsort [r sunion set1 set2] - } [lsort -uniq "[r smembers set1] [r smembers set2]"] + test "SDIFF with three sets - $type" { + assert_equal {1 2 3 4} [lsort [r sdiff set1 set4 set5]] + } - test {SINTERSTORE with two sets} { - r sinterstore setres set1 set2 - lsort [r smembers setres] - } {995 996 997 998 999} + test "SDIFFSTORE with three sets - $type" { + r sdiffstore setres set1 set4 set5 + assert_encoding intset setres + assert_equal {1 2 3 4} [lsort [r smembers setres]] + } + } - test {SINTERSTORE with two sets, after a DEBUG RELOAD} { - r debug reload - r sinterstore setres set1 set2 - lsort [r smembers setres] - } {995 996 997 998 999} + test "SINTER against non-set should throw error" { + r set key1 x + assert_error "ERR*wrong kind*" {r sinter key1 noset} + } - test {SUNIONSTORE with two sets} { - r sunionstore setres set1 set2 - lsort [r smembers setres] - } [lsort -uniq "[r smembers set1] [r smembers set2]"] + test "SUNION against non-set should throw error" { + r set key1 x + assert_error "ERR*wrong kind*" {r sunion key1 noset} + } - test {SUNIONSTORE against non existing keys} { + test "SINTERSTORE against non existing keys should delete dstkey" { r set setres xxx - list [r sunionstore setres foo111 bar222] [r exists xxx] - } {0 0} - - test {SINTER against three sets} { - r sadd set3 999 - r sadd set3 995 - r sadd set3 1000 - r sadd set3 2000 - lsort [r sinter set1 set2 set3] - } {995 999} - - test {SINTERSTORE with three sets} { - r sinterstore setres set1 set2 set3 - lsort [r smembers setres] - } {995 999} - - test {SUNION with non existing keys} { - lsort [r sunion nokey1 set1 set2 nokey2] - } [lsort -uniq "[r smembers set1] [r smembers set2]"] - - test {SDIFF with two sets} { - for {set i 5} {$i < 1000} {incr i} { - r sadd set4 $i + assert_equal 0 [r sinterstore setres foo111 bar222] + assert_equal 0 [r exists setres] + } + + test "SUNIONSTORE against non existing keys should delete dstkey" { + r set setres xxx + assert_equal 0 [r sunionstore setres foo111 bar222] + assert_equal 0 [r exists setres] + } + + foreach {type contents} {hashtable {a b c} intset {1 2 3}} { + test "SPOP basics - $type" { + create_set myset $contents + assert_encoding $type myset + assert_equal $contents [lsort [list [r spop myset] [r spop myset] [r spop myset]]] + assert_equal 0 [r scard myset] } - lsort [r sdiff set1 set4] - } {0 1 2 3 4} - test {SDIFF with three sets} { - r sadd set5 0 - lsort [r sdiff set1 set4 set5] - } {1 2 3 4} - - test {SDIFFSTORE with three sets} { - r sdiffstore sres set1 set4 set5 - lsort [r smembers sres] - } {1 2 3 4} - - test {SPOP basics} { - r del myset - r sadd myset 1 - r sadd myset 2 - r sadd myset 3 - list [lsort [list [r spop myset] [r spop myset] [r spop myset]]] [r scard myset] - } {{1 2 3} 0} - - test {SRANDMEMBER} { - r del myset - r sadd myset a - r sadd myset b - r sadd myset c - unset -nocomplain myset - array set myset {} - for {set i 0} {$i < 100} {incr i} { - set myset([r srandmember myset]) 1 - } - lsort [array names myset] - } {a b c} - + test "SRANDMEMBER - $type" { + create_set myset $contents + unset -nocomplain myset + array set myset {} + for {set i 0} {$i < 100} {incr i} { + set myset([r srandmember myset]) 1 + } + assert_equal $contents [lsort [array names myset]] + } + } + test {SMOVE basics} { r sadd myset1 a r sadd myset1 b -- 2.47.2