From: Pieter Noordhuis Date: Sat, 22 May 2010 13:07:12 +0000 (+0200) Subject: initial work for integer encoding in ziplists X-Git-Url: https://git.saurik.com/redis.git/commitdiff_plain/29b14d5face8a434810794f524c8449581166adf initial work for integer encoding in ziplists --- diff --git a/zip.c b/zip.c index cef7b1a3..e8f01edb 100644 --- a/zip.c +++ b/zip.c @@ -1,41 +1,153 @@ #define ZIP_BIGLEN 254 #define ZIP_END 255 -/* The following macro returns the number of bytes needed to encode the length - * for the integer value _l, that is, 1 byte for lengths < ZIP_BIGLEN and - * 5 bytes for all the other lengths. */ -#define ZIP_LEN_BYTES(_l) (((_l) < ZIP_BIGLEN) ? 1 : sizeof(unsigned int)+1) +/* Entry encoding */ +#define ZIP_ENC_RAW 0 +#define ZIP_ENC_SHORT 1 +#define ZIP_ENC_INT 2 +#define ZIP_ENC_LLONG 3 +#define ZIP_ENCODING(p) ((p)[0] >> 6) -/* Decode the encoded length pointed by 'p' */ -static unsigned int zipDecodeLength(unsigned char *p) { - unsigned int len = *p; +/* Length encoding for raw entries */ +#define ZIP_LEN_INLINE 0 +#define ZIP_LEN_UINT16 1 +#define ZIP_LEN_UINT32 2 - if (len < ZIP_BIGLEN) return len; - memcpy(&len,p+1,sizeof(unsigned int)); +static unsigned int zipEncodingSize(char encoding) { + if (encoding == ZIP_ENC_SHORT) { + return sizeof(short int); + } else if (encoding == ZIP_ENC_INT) { + return sizeof(int); + } else if (encoding == ZIP_ENC_LLONG) { + return sizeof(long long); + } + assert(NULL); +} + +/* Decode the encoded length pointed by 'p'. If a pointer to 'lensize' is + * provided, it is set to the number of bytes required to encode the length. */ +static unsigned int zipDecodeLength(unsigned char *p, unsigned int *lensize) { + unsigned char encoding = ZIP_ENCODING(p), lenenc; + unsigned int len; + + if (encoding == ZIP_ENC_RAW) { + lenenc = (p[0] >> 4) & 0x3; + if (lenenc == ZIP_LEN_INLINE) { + len = p[0] & 0xf; + if (lensize) *lensize = 1; + } else if (lenenc == ZIP_LEN_UINT16) { + len = p[1] | (p[2] << 8); + if (lensize) *lensize = 3; + } else { + len = p[1] | (p[2] << 8) | (p[3] << 16) | (p[4] << 24); + if (lensize) *lensize = 5; + } + } else { + len = zipEncodingSize(encoding); + if (lensize) *lensize = 1; + } return len; } /* Encode the length 'l' writing it in 'p'. If p is NULL it just returns * the amount of bytes required to encode such a length. */ -static unsigned int zipEncodeLength(unsigned char *p, unsigned int len) { - if (p == NULL) { - return ZIP_LEN_BYTES(len); - } else { - if (len < ZIP_BIGLEN) { - p[0] = len; - return 1; +static unsigned int zipEncodeLength(unsigned char *p, char encoding, unsigned int rawlen) { + unsigned char len = 1, lenenc, buf[5]; + if (encoding == ZIP_ENC_RAW) { + if (rawlen <= 0xf) { + if (!p) return len; + lenenc = ZIP_LEN_INLINE; + buf[0] = rawlen; + } else if (rawlen <= 0xffff) { + len += 2; + if (!p) return len; + lenenc = ZIP_LEN_UINT16; + buf[1] = (rawlen ) & 0xff; + buf[2] = (rawlen >> 8) & 0xff; } else { - p[0] = ZIP_BIGLEN; - memcpy(p+1,&len,sizeof(len)); - return 1+sizeof(len); + len += 4; + if (!p) return len; + lenenc = ZIP_LEN_UINT32; + buf[1] = (rawlen ) & 0xff; + buf[2] = (rawlen >> 8) & 0xff; + buf[3] = (rawlen >> 16) & 0xff; + buf[4] = (rawlen >> 24) & 0xff; } + buf[0] = (lenenc << 4) | (buf[0] & 0xf); + } + if (!p) return len; + + /* Apparently we need to store the length in 'p' */ + buf[0] = (encoding << 6) | (buf[0] & 0x3f); + memcpy(p,buf,len); + return len; +} + +/* Check if string pointed to by 'entry' can be encoded as an integer. + * Stores the integer value in 'v' and its encoding in 'encoding'. + * Warning: this function requires a NULL-terminated string! */ +static int zipTryEncoding(unsigned char *entry, long long *v, char *encoding) { + long long value; + char *eptr; + + if (entry[0] == '-' || (entry[0] >= '0' && entry[0] <= '9')) { + value = strtoll(entry,&eptr,10); + if (eptr[0] != '\0') return 0; + if (value >= SHRT_MIN && value <= SHRT_MAX) { + *encoding = ZIP_ENC_SHORT; + } else if (value >= INT_MIN && value <= INT_MAX) { + *encoding = ZIP_ENC_INT; + } else { + *encoding = ZIP_ENC_LLONG; + } + *v = value; + return 1; + } + return 0; +} + +static void zipSaveInteger(unsigned char *p, long long value, char encoding) { + short int s; + int i; + long long l; + if (encoding == ZIP_ENC_SHORT) { + s = value; + memcpy(p,&s,sizeof(s)); + } else if (encoding == ZIP_ENC_INT) { + i = value; + memcpy(p,&i,sizeof(i)); + } else if (encoding == ZIP_ENC_LLONG) { + l = value; + memcpy(p,&l,sizeof(l)); + } else { + assert(NULL); + } +} + +static long long zipLoadInteger(unsigned char *p, char encoding) { + short int s; + int i; + long long l, ret; + if (encoding == ZIP_ENC_SHORT) { + memcpy(&s,p,sizeof(s)); + ret = s; + } else if (encoding == ZIP_ENC_INT) { + memcpy(&i,p,sizeof(i)); + ret = i; + } else if (encoding == ZIP_ENC_LLONG) { + memcpy(&l,p,sizeof(l)); + ret = l; + } else { + assert(NULL); } + return ret; } /* Return the total amount used by an entry (encoded length + payload). */ static unsigned int zipRawEntryLength(unsigned char *p) { - unsigned int l = zipDecodeLength(p); - return zipEncodeLength(NULL,l) + l; + unsigned int lensize, len; + len = zipDecodeLength(p, &lensize); + return lensize + len; } /* Resize the zip* structure. */ diff --git a/ziplist.c b/ziplist.c index 596217d5..a45ae339 100644 --- a/ziplist.c +++ b/ziplist.c @@ -15,8 +15,10 @@ */ #include +#include #include #include +#include #include "zmalloc.h" #include "sds.h" #include "ziplist.h" @@ -60,13 +62,21 @@ static unsigned char *ziplistTail(unsigned char *zl) { } unsigned char *ziplistPush(unsigned char *zl, unsigned char *entry, unsigned int elen, int where) { - unsigned int curlen = ZIPLIST_BYTES(zl); - unsigned int reqlen = zipEncodeLength(NULL,elen)+elen; + unsigned int curlen = ZIPLIST_BYTES(zl), reqlen; unsigned char *p; + char encoding = ZIP_ENC_RAW; + long long value; - /* Resize the ziplist */ - zl = ziplistResize(zl,curlen+reqlen); + /* See if the entry can be encoded */ + if (zipTryEncoding(entry,&value,&encoding)) { + reqlen = zipEncodingSize(encoding); + } else { + reqlen = elen; + } + reqlen += zipEncodeLength(NULL,encoding,elen); + /* Resize the ziplist and move if needed */ + zl = ziplistResize(zl,curlen+reqlen); if (where == ZIPLIST_HEAD) { p = zl+ZIPLIST_HEADER_SIZE; if (*p != ZIP_END) { @@ -78,31 +88,44 @@ unsigned char *ziplistPush(unsigned char *zl, unsigned char *entry, unsigned int } /* Write the entry */ - p += zipEncodeLength(p,elen); - memcpy(p,entry,elen); + p += zipEncodeLength(p,encoding,elen); + if (encoding != ZIP_ENC_RAW) { + zipSaveInteger(p,value,encoding); + } else { + memcpy(p,entry,elen); + } ZIPLIST_INCR_LENGTH(zl,1); return zl; } -unsigned char *ziplistPop(unsigned char *zl, sds *value, int where) { - unsigned int curlen = ZIPLIST_BYTES(zl), len, rlen; +unsigned char *ziplistPop(unsigned char *zl, sds *target, int where) { + unsigned int curlen = ZIPLIST_BYTES(zl), rawlen; + unsigned int len, lensize; unsigned char *p; - if (value) *value = NULL; + long long value; + if (target) *target = NULL; /* Get pointer to element to remove */ p = (where == ZIPLIST_HEAD) ? ziplistHead(zl) : ziplistTail(zl); if (*p == ZIP_END) return zl; - len = zipDecodeLength(p); - if (value) *value = sdsnewlen(p+zipEncodeLength(NULL,len),len); + len = zipDecodeLength(p,&lensize); + if (target) { + if (ZIP_ENCODING(p) == ZIP_ENC_RAW) { + *target = sdsnewlen(p+lensize,len); + } else { + value = zipLoadInteger(p+lensize,ZIP_ENCODING(p)); + *target = sdscatprintf(sdsempty(), "%lld", value); + } + } /* Move list to front when popping from the head */ - rlen = zipRawEntryLength(p); + rawlen = lensize+len; if (where == ZIPLIST_HEAD) { - memmove(p,p+rlen,curlen-ZIPLIST_HEADER_SIZE-len); + memmove(p,p+rawlen,curlen-ZIPLIST_HEADER_SIZE-len); } /* Resize and update length */ - zl = ziplistResize(zl,curlen-rlen); + zl = ziplistResize(zl,curlen-rawlen); ZIPLIST_INCR_LENGTH(zl,-1); return zl; } @@ -121,10 +144,11 @@ unsigned char *ziplistIndex(unsigned char *zl, unsigned int index) { /* Store entry at current position in sds *value and return pointer * to the next entry. */ unsigned char *ziplistNext(unsigned char *p, unsigned char **q, unsigned char **entry, unsigned int *elen) { + unsigned int lensize; if (*p == ZIP_END) return NULL; if (entry) { - *elen = zipDecodeLength(p); - *entry = p+ZIP_LEN_BYTES(*elen); + *elen = zipDecodeLength(p,&lensize); + *entry = p+lensize; } if (q != NULL) *q = p; p += zipRawEntryLength(p); @@ -174,16 +198,22 @@ unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) { } void ziplistRepr(unsigned char *zl) { - unsigned char *p; - unsigned int l; + unsigned char *p, encoding; + unsigned int l, lsize; + long long value; - printf("{bytes %d} {length %u}\n",ZIPLIST_BYTES(zl), ZIPLIST_LENGTH(zl)); + printf("{total bytes %d} {length %u}\n",ZIPLIST_BYTES(zl), ZIPLIST_LENGTH(zl)); p = ziplistHead(zl); while(*p != ZIP_END) { - l = zipDecodeLength(p); - printf("{key %u}",l); - p += zipEncodeLength(NULL,l); - fwrite(p,l,1,stdout); + l = zipDecodeLength(p,&lsize); + printf("{header %u, payload %u} ",lsize,l); + encoding = ZIP_ENCODING(p); + p += lsize; + if (encoding == ZIP_ENC_RAW) { + fwrite(p,l,1,stdout); + } else { + printf("%lld", zipLoadInteger(p,encoding)); + } printf("\n"); p += l; } @@ -200,11 +230,33 @@ unsigned char *createList() { return zl; } +unsigned char *createIntList() { + unsigned char *zl = ziplistNew(); + char buf[32]; + + sprintf(buf, "100"); + zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_TAIL); + sprintf(buf, "128000"); + zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_TAIL); + sprintf(buf, "-100"); + zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_HEAD); + sprintf(buf, "4294967296"); + zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_HEAD); + sprintf(buf, "non integer"); + zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_TAIL); + sprintf(buf, "much much longer non integer"); + zl = ziplistPush(zl, buf, strlen(buf), ZIPLIST_TAIL); + return zl; +} + int main(int argc, char **argv) { unsigned char *zl, *p, *q, *entry; unsigned int elen; sds s; + zl = createIntList(); + ziplistRepr(zl); + zl = createList(); ziplistRepr(zl);