]>
git.saurik.com Git - redis.git/blob - zipmap.c
05bf6d6d9983d64d4055d43379da1f626b6b642c
   1 /* String -> String Map data structure optimized for size. 
   2  * This file implements a data structure mapping strings to other strings 
   3  * implementing an O(n) lookup data structure designed to be very memory 
   6  * The Redis Hash type uses this data structure for hashes composed of a small 
   7  * number of elements, to switch to an hash table once a given number of 
  10  * Given that many times Redis Hashes are used to represent objects composed 
  11  * of few fields, this is a very big win in terms of used memory. 
  13  * -------------------------------------------------------------------------- 
  15  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
  16  * All rights reserved. 
  18  * Redistribution and use in source and binary forms, with or without 
  19  * modification, are permitted provided that the following conditions are met: 
  21  *   * Redistributions of source code must retain the above copyright notice, 
  22  *     this list of conditions and the following disclaimer. 
  23  *   * Redistributions in binary form must reproduce the above copyright 
  24  *     notice, this list of conditions and the following disclaimer in the 
  25  *     documentation and/or other materials provided with the distribution. 
  26  *   * Neither the name of Redis nor the names of its contributors may be used 
  27  *     to endorse or promote products derived from this software without 
  28  *     specific prior written permission. 
  30  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  31  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  33  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  34  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  35  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  36  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  37  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  38  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  39  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  40  * POSSIBILITY OF SUCH DAMAGE. 
  43 /* Memory layout of a zipmap, for the map "foo" => "bar", "hello" => "world": 
  45  * <status><len>"foo"<len><free>"bar"<len>"hello"<len><free>"world" 
  47  * <status> is 1 byte status. Currently only 1 bit is used: if the least 
  48  * significant bit is set, it means the zipmap needs to be defragmented. 
  50  * <len> is the length of the following string (key or value). 
  51  * <len> lengths are encoded in a single value or in a 5 bytes value. 
  52  * If the first byte value (as an unsigned 8 bit value) is between 0 and 
  53  * 252, it's a single-byte length. If it is 253 then a four bytes unsigned 
  54  * integer follows (in the host byte ordering). A value fo 255 is used to 
  55  * signal the end of the hash. The special value 254 is used to mark 
  56  * empty space that can be used to add new key/value pairs. 
  58  * <free> is the number of free unused bytes 
  59  * after the string, resulting from modification of values associated to a 
  60  * key (for instance if "foo" is set to "bar', and later "foo" will be se to 
  61  * "hi", I'll have a free byte to use if the value will enlarge again later, 
  62  * or even in order to add a key/value pair if it fits. 
  64  * <free> is always an unsigned 8 bit number, because if after an 
  65  * update operation there are more than a few free bytes, they'll be converted 
  66  * into empty space prefixed by the special value 254. 
  68  * The most compact representation of the above two elements hash is actually: 
  70  * "\x00\x03foo\x03\x00bar\x05hello\x05\x00world\xff" 
  72  * Empty space is marked using a 254 bytes + a <len> (coded as already 
  73  * specified). The length includes the 254 bytes in the count and the 
  74  * space taken by the <len> field. So for instance removing the "foo" key 
  75  * from the zipmap above will lead to the following representation: 
  77  * "\x00\xfd\x10........\x05hello\x05\x00world\xff" 
  79  * Note that because empty space, keys, values, are all prefixed length 
  80  * "objects", the lookup will take O(N) where N is the numeber of elements 
  81  * in the zipmap and *not* the number of bytes needed to represent the zipmap. 
  82  * This lowers the constant times considerably. 
  90 #define ZIPMAP_BIGLEN 253 
  91 #define ZIPMAP_EMPTY 254 
  92 #define ZIPMAP_END 255 
  94 #define ZIPMAP_STATUS_FRAGMENTED 1 
  96 /* The following defines the max value for the <free> field described in the 
  97  * comments above, that is, the max number of trailing bytes in a value. */ 
  98 #define ZIPMAP_VALUE_MAX_FREE 5 
 100 /* The following macro returns the number of bytes needed to encode the length 
 101  * for the integer value _l, that is, 1 byte for lengths < ZIPMAP_BIGLEN and 
 102  * 5 bytes for all the other lengths. */ 
 103 #define ZIPMAP_LEN_BYTES(_l) (((_l) < ZIPMAP_BIGLEN) ? 1 : sizeof(unsigned int)+1) 
 105 /* Create a new empty zipmap. */ 
 106 unsigned char *zipmapNew(void) { 
 107     unsigned char *zm 
= zmalloc(2); 
 109     zm
[0] = 0; /* Status */ 
 114 /* Decode the encoded length pointed by 'p' */ 
 115 static unsigned int zipmapDecodeLength(unsigned char *p
) { 
 116     unsigned int len 
= *p
; 
 118     if (len 
< ZIPMAP_BIGLEN
) return len
; 
 119     memcpy(&len
,p
,sizeof(unsigned int)); 
 123 /* Encode the length 'l' writing it in 'p'. If p is NULL it just returns 
 124  * the amount of bytes required to encode such a length. */ 
 125 static unsigned int zipmapEncodeLength(unsigned char *p
, unsigned int len
) { 
 127         return ZIPMAP_LEN_BYTES(len
); 
 129         if (len 
< ZIPMAP_BIGLEN
) { 
 133             p
[0] = ZIPMAP_BIGLEN
; 
 134             memcpy(p
+1,&len
,sizeof(len
)); 
 135             return 1+sizeof(len
); 
 140 /* Search for a matching key, returning a pointer to the entry inside the 
 141  * zipmap. Returns NULL if the key is not found. 
 143  * If NULL is returned, and totlen is not NULL, it is set to the entire 
 144  * size of the zimap, so that the calling function will be able to 
 145  * reallocate the original zipmap to make room for more entries. 
 147  * If NULL is returned, and freeoff and freelen are not NULL, they are set 
 148  * to the offset of the first empty space that can hold '*freelen' bytes 
 149  * (freelen is an integer pointer used both to signal the required length 
 150  * and to get the reply from the function). If there is not a suitable 
 151  * free space block to hold the requested bytes, *freelen is set to 0. */ 
 152 static unsigned char *zipmapLookupRaw(unsigned char *zm
, unsigned char *key
, unsigned int klen
, unsigned int *totlen
, unsigned int *freeoff
, unsigned int *freelen
) { 
 153     unsigned char *p 
= zm
+1; 
 155     unsigned int reqfreelen 
= 0; /* initialized just to prevent warning */ 
 158         reqfreelen 
= *freelen
; 
 160         assert(reqfreelen 
!= 0); 
 162     while(*p 
!= ZIPMAP_END
) { 
 163         if (*p 
== ZIPMAP_EMPTY
) { 
 164             l 
= zipmapDecodeLength(p
+1); 
 165             /* if the user want a free space report, and this space is 
 166              * enough, and we did't already found a suitable space... */ 
 167             if (freelen 
&& l 
>= reqfreelen 
&& *freelen 
== 0) { 
 172             zm
[0] |= ZIPMAP_STATUS_FRAGMENTED
; 
 176             /* Match or skip the key */ 
 177             l 
= zipmapDecodeLength(p
); 
 178             if (l 
== klen 
&& !memcmp(p
+1,key
,l
)) return p
; 
 179             p 
+= zipmapEncodeLength(NULL
,l
) + l
; 
 180             /* Skip the value as well */ 
 181             l 
= zipmapDecodeLength(p
); 
 182             p 
+= zipmapEncodeLength(NULL
,l
); 
 184             p 
+= l
+1+free
; /* +1 to skip the free byte */ 
 187     if (totlen 
!= NULL
) *totlen 
= (unsigned int)(p
-zm
)+1; 
 191 static unsigned long zipmapRequiredLength(unsigned int klen
, unsigned int vlen
) { 
 195     if (klen 
>= ZIPMAP_BIGLEN
) l 
+= 4; 
 196     if (vlen 
>= ZIPMAP_BIGLEN
) l 
+= 4; 
 200 /* Return the total amount used by a key (encoded length + payload) */ 
 201 static unsigned int zipmapRawKeyLength(unsigned char *p
) { 
 202     unsigned int l 
= zipmapDecodeLength(p
); 
 204     return zipmapEncodeLength(NULL
,l
) + l
; 
 207 /* Return the total amount used by a value 
 208  * (encoded length + single byte free count + payload) */ 
 209 static unsigned int zipmapRawValueLength(unsigned char *p
) { 
 210     unsigned int l 
= zipmapDecodeLength(p
); 
 213     used 
= zipmapEncodeLength(NULL
,l
); 
 214     used 
+= p
[used
] + 1 + l
; 
 218 /* If 'p' points to a key, this function returns the total amount of 
 219  * bytes used to store this entry (entry = key + associated value + trailing 
 220  * free space if any). */ 
 221 static unsigned int zipmapRawEntryLength(unsigned char *p
) { 
 222     unsigned int l 
= zipmapRawKeyLength(p
); 
 224     return l 
+ zipmapRawValueLength(p
+l
); 
 227 /* Set key to value, creating the key if it does not already exist. 
 228  * If 'update' is not NULL, *update is set to 1 if the key was 
 229  * already preset, otherwise to 0. */ 
 230 unsigned char *zipmapSet(unsigned char *zm
, unsigned char *key
, unsigned int klen
, unsigned char *val
, unsigned int vlen
, int *update
) { 
 231     unsigned int oldlen 
= 0, freeoff 
= 0, freelen
; 
 232     unsigned int reqlen 
= zipmapRequiredLength(klen
,vlen
); 
 233     unsigned int empty
, vempty
; 
 237     if (update
) *update 
= 0; 
 238     p 
= zipmapLookupRaw(zm
,key
,klen
,&oldlen
,&freeoff
,&freelen
); 
 239     if (p 
== NULL 
&& freelen 
== 0) { 
 240         /* Key not found, and not space for the new key. Enlarge */ 
 241         zm 
= zrealloc(zm
,oldlen
+reqlen
); 
 243         zm
[oldlen
+reqlen
-1] = ZIPMAP_END
; 
 245     } else if (p 
== NULL
) { 
 246         /* Key not found, but there is enough free space. */ 
 248         /* note: freelen is already set in this case */ 
 250         unsigned char *b 
= p
; 
 252         /* Key found. Is there enough space for the new value? */ 
 253         /* Compute the total length: */ 
 254         if (update
) *update 
= 1; 
 255         freelen 
= zipmapRawKeyLength(b
); 
 257         freelen 
+= zipmapRawValueLength(b
); 
 258         if (freelen 
< reqlen
) { 
 259             /* Mark this entry as free and recurse */ 
 261             zipmapEncodeLength(p
+1,freelen
); 
 262             zm
[0] |= ZIPMAP_STATUS_FRAGMENTED
; 
 263             return zipmapSet(zm
,key
,klen
,val
,vlen
,NULL
); 
 267     /* Ok we have a suitable block where to write the new key/value 
 269     empty 
= freelen
-reqlen
; 
 270     /* If there is too much free space mark it as a free block instead 
 271      * of adding it as trailing empty space for the value, as we want 
 272      * zipmaps to be very space efficient. */ 
 273     if (empty 
> ZIPMAP_VALUE_MAX_FREE
) { 
 278         zipmapEncodeLength(e
+1,empty
); 
 280         zm
[0] |= ZIPMAP_STATUS_FRAGMENTED
; 
 285     /* Just write the key + value and we are done. */ 
 287     p 
+= zipmapEncodeLength(p
,klen
); 
 291     p 
+= zipmapEncodeLength(p
,vlen
); 
 297 /* Remove the specified key. If 'deleted' is not NULL the pointed integer is 
 298  * set to 0 if the key was not found, to 1 if it was found and deleted. */ 
 299 unsigned char *zipmapDel(unsigned char *zm
, unsigned char *key
, unsigned int klen
, int *deleted
) { 
 300     unsigned char *p 
= zipmapLookupRaw(zm
,key
,klen
,NULL
,NULL
,NULL
); 
 302         unsigned int freelen 
= zipmapRawEntryLength(p
); 
 305         zipmapEncodeLength(p
+1,freelen
); 
 306         zm
[0] |= ZIPMAP_STATUS_FRAGMENTED
; 
 307         if (deleted
) *deleted 
= 1; 
 309         if (deleted
) *deleted 
= 0; 
 314 /* Call it before to iterate trought elements via zipmapNext() */ 
 315 unsigned char *zipmapRewind(unsigned char *zm
) { 
 319 /* This function is used to iterate through all the zipmap elements. 
 320  * In the first call the first argument is the pointer to the zipmap + 1. 
 321  * In the next calls what zipmapNext returns is used as first argument. 
 324  * unsigned char *i = zipmapRewind(my_zipmap); 
 325  * while((i = zipmapNext(i,&key,&klen,&value,&vlen)) != NULL) { 
 326  *     printf("%d bytes key at $p\n", klen, key); 
 327  *     printf("%d bytes value at $p\n", vlen, value); 
 330 unsigned char *zipmapNext(unsigned char *zm
, unsigned char **key
, unsigned int *klen
, unsigned char **value
, unsigned int *vlen
) { 
 331     while(zm
[0] == ZIPMAP_EMPTY
) 
 332         zm 
+= zipmapDecodeLength(zm
+1); 
 333     if (zm
[0] == ZIPMAP_END
) return NULL
; 
 336         *klen 
= zipmapDecodeLength(zm
); 
 337         *key 
+= ZIPMAP_LEN_BYTES(*klen
); 
 339     zm 
+= zipmapRawKeyLength(zm
); 
 342         *vlen 
= zipmapDecodeLength(zm
); 
 343         *value 
+= ZIPMAP_LEN_BYTES(*vlen
); 
 345     zm 
+= zipmapRawValueLength(zm
); 
 349 /* Search a key and retrieve the pointer and len of the associated value. 
 350  * If the key is found the function returns 1, otherwise 0. */ 
 351 int zipmapGet(unsigned char *zm
, unsigned char *key
, unsigned int klen
, unsigned char **value
, unsigned int *vlen
) { 
 354     if ((p 
= zipmapLookupRaw(zm
,key
,klen
,NULL
,NULL
,NULL
)) == NULL
) return 0; 
 355     p 
+= zipmapRawKeyLength(p
); 
 356     *vlen 
= zipmapDecodeLength(p
); 
 357     *value 
= p 
+ ZIPMAP_LEN_BYTES(*vlen
) + 1; 
 361 /* Return 1 if the key exists, otherwise 0 is returned. */ 
 362 int zipmapExists(unsigned char *zm
, unsigned char *key
, unsigned int klen
) { 
 363     return zipmapLookupRaw(zm
,key
,klen
,NULL
,NULL
,NULL
) != NULL
; 
 366 void zipmapRepr(unsigned char *p
) { 
 369     printf("{status %u}",*p
++); 
 371         if (p
[0] == ZIPMAP_END
) { 
 374         } else if (p
[0] == ZIPMAP_EMPTY
) { 
 375             l 
= zipmapDecodeLength(p
+1); 
 376             printf("{%u empty block}", l
); 
 381             l 
= zipmapDecodeLength(p
); 
 382             printf("{key %u}",l
); 
 383             p 
+= zipmapEncodeLength(NULL
,l
); 
 384             fwrite(p
,l
,1,stdout
); 
 387             l 
= zipmapDecodeLength(p
); 
 388             printf("{value %u}",l
); 
 389             p 
+= zipmapEncodeLength(NULL
,l
); 
 391             fwrite(p
,l
,1,stdout
); 
 395                 while(e
--) printf("."); 
 403 #ifdef ZIPMAP_TEST_MAIN 
 408     zm 
= zipmapSet(zm
,(unsigned char*) "hello",5, (unsigned char*) "world!",6,NULL
); 
 409     zm 
= zipmapSet(zm
,(unsigned char*) "foo",3, (unsigned char*) "bar",3,NULL
); 
 410     zm 
= zipmapSet(zm
,(unsigned char*) "foo",3, (unsigned char*) "!",1,NULL
); 
 412     zm 
= zipmapSet(zm
,(unsigned char*) "foo",3, (unsigned char*) "12345",5,NULL
); 
 414     zm 
= zipmapSet(zm
,(unsigned char*) "new",3, (unsigned char*) "xx",2,NULL
); 
 415     zm 
= zipmapSet(zm
,(unsigned char*) "noval",5, (unsigned char*) "",0,NULL
); 
 417     zm 
= zipmapDel(zm
,(unsigned char*) "new",3,NULL
); 
 419     printf("\nPerform a direct lookup:\n"); 
 421         unsigned char *value
; 
 424         if (zipmapGet(zm
,(unsigned char*) "foo",3,&value
,&vlen
)) { 
 425             printf("  foo is associated to the %d bytes value: %.*s\n", 
 429     printf("\nIterate trought elements:\n"); 
 431         unsigned char *i 
= zipmapRewind(zm
); 
 432         unsigned char *key
, *value
; 
 433         unsigned int klen
, vlen
; 
 435         while((i 
= zipmapNext(i
,&key
,&klen
,&value
,&vlen
)) != NULL
) { 
 436             printf("  %d:%.*s => %d:%.*s\n", klen
, klen
, key
, vlen
, vlen
, value
);