]>
git.saurik.com Git - redis.git/blob - src/zipmap.c
9f663fda0d4906be333c1973cbf07a0cba656126
   1 /* String -> String Map data structure optimized for size. 
   2  * This file implements a data structure mapping strings to other strings 
   3  * implementing an O(n) lookup data structure designed to be very memory 
   6  * The Redis Hash type uses this data structure for hashes composed of a small 
   7  * number of elements, to switch to an hash table once a given number of 
  10  * Given that many times Redis Hashes are used to represent objects composed 
  11  * of few fields, this is a very big win in terms of used memory. 
  13  * -------------------------------------------------------------------------- 
  15  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
  16  * All rights reserved. 
  18  * Redistribution and use in source and binary forms, with or without 
  19  * modification, are permitted provided that the following conditions are met: 
  21  *   * Redistributions of source code must retain the above copyright notice, 
  22  *     this list of conditions and the following disclaimer. 
  23  *   * Redistributions in binary form must reproduce the above copyright 
  24  *     notice, this list of conditions and the following disclaimer in the 
  25  *     documentation and/or other materials provided with the distribution. 
  26  *   * Neither the name of Redis nor the names of its contributors may be used 
  27  *     to endorse or promote products derived from this software without 
  28  *     specific prior written permission. 
  30  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  31  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  33  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  34  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  35  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  36  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  37  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  38  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  39  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  40  * POSSIBILITY OF SUCH DAMAGE. 
  43 /* Memory layout of a zipmap, for the map "foo" => "bar", "hello" => "world": 
  45  * <zmlen><len>"foo"<len><free>"bar"<len>"hello"<len><free>"world" 
  47  * <zmlen> is 1 byte length that holds the current size of the zipmap. 
  48  * When the zipmap length is greater than or equal to 254, this value 
  49  * is not used and the zipmap needs to be traversed to find out the length. 
  51  * <len> is the length of the following string (key or value). 
  52  * <len> lengths are encoded in a single value or in a 5 bytes value. 
  53  * If the first byte value (as an unsigned 8 bit value) is between 0 and 
  54  * 252, it's a single-byte length. If it is 253 then a four bytes unsigned 
  55  * integer follows (in the host byte ordering). A value fo 255 is used to 
  56  * signal the end of the hash. The special value 254 is used to mark 
  57  * empty space that can be used to add new key/value pairs. 
  59  * <free> is the number of free unused bytes 
  60  * after the string, resulting from modification of values associated to a 
  61  * key (for instance if "foo" is set to "bar', and later "foo" will be se to 
  62  * "hi", I'll have a free byte to use if the value will enlarge again later, 
  63  * or even in order to add a key/value pair if it fits. 
  65  * <free> is always an unsigned 8 bit number, because if after an 
  66  * update operation there are more than a few free bytes, the zipmap will be 
  67  * reallocated to make sure it is as small as possible. 
  69  * The most compact representation of the above two elements hash is actually: 
  71  * "\x02\x03foo\x03\x00bar\x05hello\x05\x00world\xff" 
  73  * Note that because keys and values are prefixed length "objects", 
  74  * the lookup will take O(N) where N is the number of elements 
  75  * in the zipmap and *not* the number of bytes needed to represent the zipmap. 
  76  * This lowers the constant times considerably. 
  84 #define ZIPMAP_BIGLEN 254 
  85 #define ZIPMAP_END 255 
  87 /* The following defines the max value for the <free> field described in the 
  88  * comments above, that is, the max number of trailing bytes in a value. */ 
  89 #define ZIPMAP_VALUE_MAX_FREE 4 
  91 /* The following macro returns the number of bytes needed to encode the length 
  92  * for the integer value _l, that is, 1 byte for lengths < ZIPMAP_BIGLEN and 
  93  * 5 bytes for all the other lengths. */ 
  94 #define ZIPMAP_LEN_BYTES(_l) (((_l) < ZIPMAP_BIGLEN) ? 1 : sizeof(unsigned int)+1) 
  96 /* Create a new empty zipmap. */ 
  97 unsigned char *zipmapNew(void) { 
  98     unsigned char *zm 
= zmalloc(2); 
 100     zm
[0] = 0; /* Length */ 
 105 /* Decode the encoded length pointed by 'p' */ 
 106 static unsigned int zipmapDecodeLength(unsigned char *p
) { 
 107     unsigned int len 
= *p
; 
 109     if (len 
< ZIPMAP_BIGLEN
) return len
; 
 110     memcpy(&len
,p
+1,sizeof(unsigned int)); 
 114 /* Encode the length 'l' writing it in 'p'. If p is NULL it just returns 
 115  * the amount of bytes required to encode such a length. */ 
 116 static unsigned int zipmapEncodeLength(unsigned char *p
, unsigned int len
) { 
 118         return ZIPMAP_LEN_BYTES(len
); 
 120         if (len 
< ZIPMAP_BIGLEN
) { 
 124             p
[0] = ZIPMAP_BIGLEN
; 
 125             memcpy(p
+1,&len
,sizeof(len
)); 
 126             return 1+sizeof(len
); 
 131 /* Search for a matching key, returning a pointer to the entry inside the 
 132  * zipmap. Returns NULL if the key is not found. 
 134  * If NULL is returned, and totlen is not NULL, it is set to the entire 
 135  * size of the zimap, so that the calling function will be able to 
 136  * reallocate the original zipmap to make room for more entries. */ 
 137 static unsigned char *zipmapLookupRaw(unsigned char *zm
, unsigned char *key
, unsigned int klen
, unsigned int *totlen
) { 
 138     unsigned char *p 
= zm
+1, *k 
= NULL
; 
 141     while(*p 
!= ZIPMAP_END
) { 
 144         /* Match or skip the key */ 
 145         l 
= zipmapDecodeLength(p
); 
 146         llen 
= zipmapEncodeLength(NULL
,l
); 
 147         if (k 
== NULL 
&& l 
== klen 
&& !memcmp(p
+llen
,key
,l
)) { 
 148             /* Only return when the user doesn't care 
 149              * for the total length of the zipmap. */ 
 150             if (totlen 
!= NULL
) { 
 157         /* Skip the value as well */ 
 158         l 
= zipmapDecodeLength(p
); 
 159         p 
+= zipmapEncodeLength(NULL
,l
); 
 161         p 
+= l
+1+free
; /* +1 to skip the free byte */ 
 163     if (totlen 
!= NULL
) *totlen 
= (unsigned int)(p
-zm
)+1; 
 167 static unsigned long zipmapRequiredLength(unsigned int klen
, unsigned int vlen
) { 
 171     if (klen 
>= ZIPMAP_BIGLEN
) l 
+= 4; 
 172     if (vlen 
>= ZIPMAP_BIGLEN
) l 
+= 4; 
 176 /* Return the total amount used by a key (encoded length + payload) */ 
 177 static unsigned int zipmapRawKeyLength(unsigned char *p
) { 
 178     unsigned int l 
= zipmapDecodeLength(p
); 
 179     return zipmapEncodeLength(NULL
,l
) + l
; 
 182 /* Return the total amount used by a value 
 183  * (encoded length + single byte free count + payload) */ 
 184 static unsigned int zipmapRawValueLength(unsigned char *p
) { 
 185     unsigned int l 
= zipmapDecodeLength(p
); 
 188     used 
= zipmapEncodeLength(NULL
,l
); 
 189     used 
+= p
[used
] + 1 + l
; 
 193 /* If 'p' points to a key, this function returns the total amount of 
 194  * bytes used to store this entry (entry = key + associated value + trailing 
 195  * free space if any). */ 
 196 static unsigned int zipmapRawEntryLength(unsigned char *p
) { 
 197     unsigned int l 
= zipmapRawKeyLength(p
); 
 198     return l 
+ zipmapRawValueLength(p
+l
); 
 201 static inline unsigned char *zipmapResize(unsigned char *zm
, unsigned int len
) { 
 202     zm 
= zrealloc(zm
, len
); 
 203     zm
[len
-1] = ZIPMAP_END
; 
 207 /* Set key to value, creating the key if it does not already exist. 
 208  * If 'update' is not NULL, *update is set to 1 if the key was 
 209  * already preset, otherwise to 0. */ 
 210 unsigned char *zipmapSet(unsigned char *zm
, unsigned char *key
, unsigned int klen
, unsigned char *val
, unsigned int vlen
, int *update
) { 
 211     unsigned int zmlen
, offset
; 
 212     unsigned int freelen
, reqlen 
= zipmapRequiredLength(klen
,vlen
); 
 213     unsigned int empty
, vempty
; 
 217     if (update
) *update 
= 0; 
 218     p 
= zipmapLookupRaw(zm
,key
,klen
,&zmlen
); 
 220         /* Key not found: enlarge */ 
 221         zm 
= zipmapResize(zm
, zmlen
+reqlen
); 
 223         zmlen 
= zmlen
+reqlen
; 
 225         /* Increase zipmap length (this is an insert) */ 
 226         if (zm
[0] < ZIPMAP_BIGLEN
) zm
[0]++; 
 228         /* Key found. Is there enough space for the new value? */ 
 229         /* Compute the total length: */ 
 230         if (update
) *update 
= 1; 
 231         freelen 
= zipmapRawEntryLength(p
); 
 232         if (freelen 
< reqlen
) { 
 233             /* Store the offset of this key within the current zipmap, so 
 234              * it can be resized. Then, move the tail backwards so this 
 235              * pair fits at the current position. */ 
 237             zm 
= zipmapResize(zm
, zmlen
-freelen
+reqlen
); 
 240             /* The +1 in the number of bytes to be moved is caused by the 
 241              * end-of-zipmap byte. Note: the *original* zmlen is used. */ 
 242             memmove(p
+reqlen
, p
+freelen
, zmlen
-(offset
+freelen
+1)); 
 243             zmlen 
= zmlen
-freelen
+reqlen
; 
 248     /* We now have a suitable block where the key/value entry can 
 249      * be written. If there is too much free space, move the tail 
 250      * of the zipmap a few bytes to the front and shrink the zipmap, 
 251      * as we want zipmaps to be very space efficient. */ 
 252     empty 
= freelen
-reqlen
; 
 253     if (empty 
>= ZIPMAP_VALUE_MAX_FREE
) { 
 254         /* First, move the tail <empty> bytes to the front, then resize 
 255          * the zipmap to be <empty> bytes smaller. */ 
 257         memmove(p
+reqlen
, p
+freelen
, zmlen
-(offset
+freelen
+1)); 
 259         zm 
= zipmapResize(zm
, zmlen
); 
 266     /* Just write the key + value and we are done. */ 
 268     p 
+= zipmapEncodeLength(p
,klen
); 
 272     p 
+= zipmapEncodeLength(p
,vlen
); 
 278 /* Remove the specified key. If 'deleted' is not NULL the pointed integer is 
 279  * set to 0 if the key was not found, to 1 if it was found and deleted. */ 
 280 unsigned char *zipmapDel(unsigned char *zm
, unsigned char *key
, unsigned int klen
, int *deleted
) { 
 281     unsigned int zmlen
, freelen
; 
 282     unsigned char *p 
= zipmapLookupRaw(zm
,key
,klen
,&zmlen
); 
 284         freelen 
= zipmapRawEntryLength(p
); 
 285         memmove(p
, p
+freelen
, zmlen
-((p
-zm
)+freelen
+1)); 
 286         zm 
= zipmapResize(zm
, zmlen
-freelen
); 
 288         /* Decrease zipmap length */ 
 289         if (zm
[0] < ZIPMAP_BIGLEN
) zm
[0]--; 
 291         if (deleted
) *deleted 
= 1; 
 293         if (deleted
) *deleted 
= 0; 
 298 /* Call it before to iterate trought elements via zipmapNext() */ 
 299 unsigned char *zipmapRewind(unsigned char *zm
) { 
 303 /* This function is used to iterate through all the zipmap elements. 
 304  * In the first call the first argument is the pointer to the zipmap + 1. 
 305  * In the next calls what zipmapNext returns is used as first argument. 
 308  * unsigned char *i = zipmapRewind(my_zipmap); 
 309  * while((i = zipmapNext(i,&key,&klen,&value,&vlen)) != NULL) { 
 310  *     printf("%d bytes key at $p\n", klen, key); 
 311  *     printf("%d bytes value at $p\n", vlen, value); 
 314 unsigned char *zipmapNext(unsigned char *zm
, unsigned char **key
, unsigned int *klen
, unsigned char **value
, unsigned int *vlen
) { 
 315     if (zm
[0] == ZIPMAP_END
) return NULL
; 
 318         *klen 
= zipmapDecodeLength(zm
); 
 319         *key 
+= ZIPMAP_LEN_BYTES(*klen
); 
 321     zm 
+= zipmapRawKeyLength(zm
); 
 324         *vlen 
= zipmapDecodeLength(zm
); 
 325         *value 
+= ZIPMAP_LEN_BYTES(*vlen
); 
 327     zm 
+= zipmapRawValueLength(zm
); 
 331 /* Search a key and retrieve the pointer and len of the associated value. 
 332  * If the key is found the function returns 1, otherwise 0. */ 
 333 int zipmapGet(unsigned char *zm
, unsigned char *key
, unsigned int klen
, unsigned char **value
, unsigned int *vlen
) { 
 336     if ((p 
= zipmapLookupRaw(zm
,key
,klen
,NULL
)) == NULL
) return 0; 
 337     p 
+= zipmapRawKeyLength(p
); 
 338     *vlen 
= zipmapDecodeLength(p
); 
 339     *value 
= p 
+ ZIPMAP_LEN_BYTES(*vlen
) + 1; 
 343 /* Return 1 if the key exists, otherwise 0 is returned. */ 
 344 int zipmapExists(unsigned char *zm
, unsigned char *key
, unsigned int klen
) { 
 345     return zipmapLookupRaw(zm
,key
,klen
,NULL
) != NULL
; 
 348 /* Return the number of entries inside a zipmap */ 
 349 unsigned int zipmapLen(unsigned char *zm
) { 
 350     unsigned int len 
= 0; 
 351     if (zm
[0] < ZIPMAP_BIGLEN
) { 
 354         unsigned char *p 
= zipmapRewind(zm
); 
 355         while((p 
= zipmapNext(p
,NULL
,NULL
,NULL
,NULL
)) != NULL
) len
++; 
 357         /* Re-store length if small enough */ 
 358         if (len 
< ZIPMAP_BIGLEN
) zm
[0] = len
; 
 363 /* Return the raw size in bytes of a zipmap, so that we can serialize 
 364  * the zipmap on disk (or everywhere is needed) just writing the returned 
 365  * amount of bytes of the C array starting at the zipmap pointer. */ 
 366 size_t zipmapBlobLen(unsigned char *zm
) { 
 367     unsigned char *p 
= zipmapRewind(zm
); 
 368     unsigned char *old 
= p
; 
 369     while((p 
= zipmapNext(p
,NULL
,NULL
,NULL
,NULL
)) != NULL
) { 
 375 void zipmapRepr(unsigned char *p
) { 
 378     printf("{status %u}",*p
++); 
 380         if (p
[0] == ZIPMAP_END
) { 
 386             l 
= zipmapDecodeLength(p
); 
 387             printf("{key %u}",l
); 
 388             p 
+= zipmapEncodeLength(NULL
,l
); 
 389             if (l 
!= 0 && fwrite(p
,l
,1,stdout
) == 0) perror("fwrite"); 
 392             l 
= zipmapDecodeLength(p
); 
 393             printf("{value %u}",l
); 
 394             p 
+= zipmapEncodeLength(NULL
,l
); 
 396             if (l 
!= 0 && fwrite(p
,l
,1,stdout
) == 0) perror("fwrite"); 
 400                 while(e
--) printf("."); 
 408 #ifdef ZIPMAP_TEST_MAIN 
 414     zm 
= zipmapSet(zm
,(unsigned char*) "name",4, (unsigned char*) "foo",3,NULL
); 
 415     zm 
= zipmapSet(zm
,(unsigned char*) "surname",7, (unsigned char*) "foo",3,NULL
); 
 416     zm 
= zipmapSet(zm
,(unsigned char*) "age",3, (unsigned char*) "foo",3,NULL
); 
 419     zm 
= zipmapSet(zm
,(unsigned char*) "hello",5, (unsigned char*) "world!",6,NULL
); 
 420     zm 
= zipmapSet(zm
,(unsigned char*) "foo",3, (unsigned char*) "bar",3,NULL
); 
 421     zm 
= zipmapSet(zm
,(unsigned char*) "foo",3, (unsigned char*) "!",1,NULL
); 
 423     zm 
= zipmapSet(zm
,(unsigned char*) "foo",3, (unsigned char*) "12345",5,NULL
); 
 425     zm 
= zipmapSet(zm
,(unsigned char*) "new",3, (unsigned char*) "xx",2,NULL
); 
 426     zm 
= zipmapSet(zm
,(unsigned char*) "noval",5, (unsigned char*) "",0,NULL
); 
 428     zm 
= zipmapDel(zm
,(unsigned char*) "new",3,NULL
); 
 431     printf("\nLook up large key:\n"); 
 433         unsigned char buf
[512]; 
 434         unsigned char *value
; 
 435         unsigned int vlen
, i
; 
 436         for (i 
= 0; i 
< 512; i
++) buf
[i
] = 'a'; 
 438         zm 
= zipmapSet(zm
,buf
,512,(unsigned char*) "long",4,NULL
); 
 439         if (zipmapGet(zm
,buf
,512,&value
,&vlen
)) { 
 440             printf("  <long key> is associated to the %d bytes value: %.*s\n", 
 445     printf("\nPerform a direct lookup:\n"); 
 447         unsigned char *value
; 
 450         if (zipmapGet(zm
,(unsigned char*) "foo",3,&value
,&vlen
)) { 
 451             printf("  foo is associated to the %d bytes value: %.*s\n", 
 455     printf("\nIterate trought elements:\n"); 
 457         unsigned char *i 
= zipmapRewind(zm
); 
 458         unsigned char *key
, *value
; 
 459         unsigned int klen
, vlen
; 
 461         while((i 
= zipmapNext(i
,&key
,&klen
,&value
,&vlen
)) != NULL
) { 
 462             printf("  %d:%.*s => %d:%.*s\n", klen
, klen
, key
, vlen
, vlen
, value
);