From: antirez Date: Thu, 13 May 2010 11:36:42 +0000 (+0200) Subject: very strong speedup in saving time performance when there are many integers in the... X-Git-Url: https://git.saurik.com/redis.git/commitdiff_plain/32a6651355cc75f19f40b7d7a4cbd419ea19f39f very strong speedup in saving time performance when there are many integers in the dataset. Instead of decoding the object before to pass them to the rdbSaveObject layer we check asap if the object is integer encoded and can be written on disk as an integer. --- diff --git a/TODO b/TODO index bdbe7974..5ca3cada 100644 --- a/TODO +++ b/TODO @@ -1,29 +1,18 @@ Redis TODO and Roadmap -VERSION 2.0 TODO +VERSION 2.2 TODO ================ * BRPOPLPUSH -* List ops like L/RPUSH L/RPOP should return the new list length. * Save dataset / fsync() on SIGTERM -* MULTI/EXEC should support the "EXEC FSYNC" form? -* BLPOP & C. tests (write a non blocking Tcl client as first step) -* Once ZRANK is implemented, change the implementation of ZCOUNT to use the augmented skiplist in order to be much faster. +* Change the implementation of ZCOUNT to use the augmented skiplist in order to be much faster. * Write doc for ZCOUNT, and for open / closed intervals of sorted sets range operations. Virtual Memory sub-TODO: -* Check if the page selection algorithm is working well -* Divide swappability of objects by refcount * Use multiple open FDs against the VM file, one for thread. -* EXISTS should avoid loading the object if possible without making the code too specialized. * vm-min-age option -* Make sure objects loaded from the VM are specially encoded when possible. * Check what happens performance-wise if instead to create threads again and again the same threads are reused forever. Note: this requires a way to disable this clients in the child, but waiting for empty new jobs queue can be enough. -* Sets of integers are slow to load, for a number of reasons. Fix it. (use slow_sets.rdb file for debugging). (p.s. this was now partially fixed). -* On EXEC try to block the client until relevant keys are loaded. -* Hashes (GET/SET/DEL/INCRBY/EXISTS/FIELDS/LEN/MSET/MGET). Special encoding for hashes with less than N elements. -* Write documentation for APPEND * Implement LEN, PEEK, POKE, SETBIT, GETBIT VERSION 2.2 TODO (Fault tolerant sharding) diff --git a/redis.c b/redis.c index 0c411268..155af351 100644 --- a/redis.c +++ b/redis.c @@ -3379,22 +3379,12 @@ static int rdbSaveLen(FILE *fp, uint32_t len) { return 0; } -/* String objects in the form "2391" "-100" without any space and with a - * range of values that can fit in an 8, 16 or 32 bit signed value can be - * encoded as integers to save space */ -static int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) { - long long value; - char *endptr, buf[32]; - - /* Check if it's possible to encode this value as a number */ - value = strtoll(s, &endptr, 10); - if (endptr[0] != '\0') return 0; - ll2string(buf,32,value); - - /* If the number converted back into a string is not identical - * then it's not possible to encode the string as integer */ - if (strlen(buf) != len || memcmp(buf,s,len)) return 0; - +/* Encode 'value' as an integer if possible (if integer will fit the + * supported range). If the function sucessful encoded the integer + * then the (up to 5 bytes) encoded representation is written in the + * string pointed by 'enc' and the length is returned. Otherwise + * 0 is returned. */ +static int rdbEncodeInteger(long long value, unsigned char *enc) { /* Finally check if it fits in our ranges */ if (value >= -(1<<7) && value <= (1<<7)-1) { enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8; @@ -3417,6 +3407,25 @@ static int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) { } } +/* String objects in the form "2391" "-100" without any space and with a + * range of values that can fit in an 8, 16 or 32 bit signed value can be + * encoded as integers to save space */ +static int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) { + long long value; + char *endptr, buf[32]; + + /* Check if it's possible to encode this value as a number */ + value = strtoll(s, &endptr, 10); + if (endptr[0] != '\0') return 0; + ll2string(buf,32,value); + + /* If the number converted back into a string is not identical + * then it's not possible to encode the string as integer */ + if (strlen(buf) != len || memcmp(buf,s,len)) return 0; + + return rdbEncodeInteger(value,enc); +} + static int rdbSaveLzfStringObject(FILE *fp, unsigned char *s, size_t len) { size_t comprlen, outlen; unsigned char byte; @@ -3480,6 +3489,21 @@ static int rdbSaveRawString(FILE *fp, unsigned char *s, size_t len) { static int rdbSaveStringObject(FILE *fp, robj *obj) { int retval; + /* Avoid to decode the object, then encode it again, if the + * object is alrady integer encoded. */ + if (obj->encoding == REDIS_ENCODING_INT) { + long val = (long) obj->ptr; + unsigned char buf[5]; + int enclen; + + if ((enclen = rdbEncodeInteger(val,buf)) > 0) { + if (fwrite(buf,enclen,1,fp) == 0) return -1; + return 0; + } + /* otherwise... fall throught and continue with the usual + * code path. */ + } + /* Avoid incr/decr ref count business when possible. * This plays well with copy-on-write given that we are probably * in a child process (BGSAVE). Also this makes sure key objects