-/* Memory layout of a ziplist, containing "foo", "bar", "quux":
- * <zlbytes><zllen><len>"foo"<len>"bar"<len>"quux"
+/* The ziplist is a specially encoded dually linked list that is designed
+ * to be very memory efficient. It stores both strings and integer values,
+ * where integers are encoded as actual integers instead of a series of
+ * characters. It allows push and pop operations on either side of the list
+ * in O(1) time. However, because every operation requires a reallocation of
+ * the memory used by the ziplist, the actual complexity is related to the
+ * amount of memory used by the ziplist.
*
- * <zlbytes> is an unsigned integer to hold the number of bytes that
- * the ziplist occupies. This is stored to not have to traverse the ziplist
- * to know the new length when pushing.
+ * ----------------------------------------------------------------------------
*
- * <zllen> is the number of items in the ziplist. When this value is
- * greater than 254, we need to traverse the entire list to know
- * how many items it holds.
+ * ZIPLIST OVERALL LAYOUT:
+ * The general layout of the ziplist is as follows:
+ * <zlbytes><zltail><zllen><entry><entry><zlend>
*
- * <len> is the number of bytes occupied by a single entry. When this
- * number is greater than 253, the length will occupy 5 bytes, where
- * the extra bytes contain an unsigned integer to hold the length.
+ * <zlbytes> is an unsigned integer to hold the number of bytes that the
+ * ziplist occupies. This value needs to be stored to be able to resize the
+ * entire structure without the need to traverse it first.
+ *
+ * <zltail> is the offset to the last entry in the list. This allows a pop
+ * operation on the far side of the list without the need for full traversal.
+ *
+ * <zllen> is the number of entries.When this value is larger than 2**16-2,
+ * we need to traverse the entire list to know how many items it holds.
+ *
+ * <zlend> is a single byte special value, equal to 255, which indicates the
+ * end of the list.
+ *
+ * ZIPLIST ENTRIES:
+ * Every entry in the ziplist is prefixed by a header that contains two pieces
+ * of information. First, the length of the previous entry is stored to be
+ * able to traverse the list from back to front. Second, the encoding with an
+ * optional string length of the entry itself is stored.
+ *
+ * The length of the previous entry is encoded in the following way:
+ * If this length is smaller than 254 bytes, it will only consume a single
+ * byte that takes the length as value. When the length is greater than or
+ * equal to 254, it will consume 5 bytes. The first byte is set to 254 to
+ * indicate a larger value is following. The remaining 4 bytes take the
+ * length of the previous entry as value.
+ *
+ * The other header field of the entry itself depends on the contents of the
+ * entry. When the entry is a string, the first 2 bits of this header will hold
+ * the type of encoding used to store the length of the string, followed by the
+ * actual length of the string. When the entry is an integer the first 2 bits
+ * are both set to 1. The following 2 bits are used to specify what kind of
+ * integer will be stored after this header. An overview of the different
+ * types and encodings is as follows:
+ *
+ * |00pppppp| - 1 byte
+ * String value with length less than or equal to 63 bytes (6 bits).
+ * |01pppppp|qqqqqqqq| - 2 bytes
+ * String value with length less than or equal to 16383 bytes (14 bits).
+ * |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
+ * String value with length greater than or equal to 16384 bytes.
+ * |11000000| - 1 byte
+ * Integer encoded as int16_t (2 bytes).
+ * |11010000| - 1 byte
+ * Integer encoded as int32_t (4 bytes).
+ * |11100000| - 1 byte
+ * Integer encoded as int64_t (8 bytes).
+ * |11110000| - 1 byte
+ * Integer encoded as 24 bit signed (3 bytes).
+ * |11111110| - 1 byte
+ * Integer encoded as 8 bit signed (1 byte).
+ * |1111xxxx| - (with xxxx between 0000 and 1101) immediate 4 bit integer.
+ * Unsigned integer from 0 to 12. The encoded value is actually from
+ * 1 to 13 because 0000 and 1111 can not be used, so 1 should be
+ * subtracted from the encoded 4 bit value to obtain the right value.
+ * |11111111| - End of ziplist.
+ *
+ * All the integers are represented in little endian byte order.
*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include "zmalloc.h"
+#include "util.h"
#include "ziplist.h"
+#include "endianconv.h"
-/* Important note: the ZIP_END value is used to depict the end of the
- * ziplist structure. When a pointer contains an entry, the first couple
- * of bytes contain the encoded length of the previous entry. This length
- * is encoded as ZIP_ENC_RAW length, so the first two bits will contain 00
- * and the byte will therefore never have a value of 255. */
#define ZIP_END 255
#define ZIP_BIGLEN 254
-/* Entry encoding */
-#define ZIP_ENC_RAW 0
-#define ZIP_ENC_INT16 1
-#define ZIP_ENC_INT32 2
-#define ZIP_ENC_INT64 3
-#define ZIP_ENCODING(p) ((p)[0] >> 6)
-
-/* Length encoding for raw entries */
-#define ZIP_LEN_INLINE 0
-#define ZIP_LEN_UINT16 1
-#define ZIP_LEN_UINT32 2
+/* Different encoding/length possibilities */
+#define ZIP_STR_MASK 0xc0
+#define ZIP_INT_MASK 0x30
+#define ZIP_STR_06B (0 << 6)
+#define ZIP_STR_14B (1 << 6)
+#define ZIP_STR_32B (2 << 6)
+#define ZIP_INT_16B (0xc0 | 0<<4)
+#define ZIP_INT_32B (0xc0 | 1<<4)
+#define ZIP_INT_64B (0xc0 | 2<<4)
+#define ZIP_INT_24B (0xc0 | 3<<4)
+#define ZIP_INT_8B 0xfe
+/* 4 bit integer immediate encoding */
+#define ZIP_INT_IMM_MASK 0x0f
+#define ZIP_INT_IMM_MIN 0xf1 /* 11110001 */
+#define ZIP_INT_IMM_MAX 0xfd /* 11111101 */
+#define ZIP_INT_IMM_VAL(v) (v & ZIP_INT_IMM_MASK)
+
+#define INT24_MAX 0x7fffff
+#define INT24_MIN (-INT24_MAX - 1)
+
+/* Macro to determine type */
+#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK)
/* Utility macros */
#define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl)))
#define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))
#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
#define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE)
-#define ZIPLIST_ENTRY_TAIL(zl) ((zl)+ZIPLIST_TAIL_OFFSET(zl))
-#define ZIPLIST_ENTRY_END(zl) ((zl)+ZIPLIST_BYTES(zl)-1)
+#define ZIPLIST_ENTRY_TAIL(zl) ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))
+#define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-1)
/* We know a positive increment can only be 1 because entries can only be
* pushed one at a time. */
#define ZIPLIST_INCR_LENGTH(zl,incr) { \
- if (ZIPLIST_LENGTH(zl) < UINT16_MAX) ZIPLIST_LENGTH(zl)+=incr; }
+ if (ZIPLIST_LENGTH(zl) < UINT16_MAX) \
+ ZIPLIST_LENGTH(zl) = intrev16ifbe(intrev16ifbe(ZIPLIST_LENGTH(zl))+incr); \
+}
typedef struct zlentry {
unsigned int prevrawlensize, prevrawlen;
unsigned char *p;
} zlentry;
+/* Extract the encoding from the byte pointed by 'ptr' and set it into
+ * 'encoding'. */
+#define ZIP_ENTRY_ENCODING(ptr, encoding) do { \
+ (encoding) = (ptr[0]); \
+ if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \
+} while(0)
+
/* Return bytes needed to store integer encoded by 'encoding' */
-static unsigned int zipEncodingSize(unsigned char encoding) {
- if (encoding == ZIP_ENC_INT16) {
- return sizeof(int16_t);
- } else if (encoding == ZIP_ENC_INT32) {
- return sizeof(int32_t);
- } else if (encoding == ZIP_ENC_INT64) {
- return sizeof(int64_t);
+static unsigned int zipIntSize(unsigned char encoding) {
+ switch(encoding) {
+ case ZIP_INT_8B: return 1;
+ case ZIP_INT_16B: return 2;
+ case ZIP_INT_24B: return 3;
+ case ZIP_INT_32B: return 4;
+ case ZIP_INT_64B: return 8;
+ default: return 0; /* 4 bit immediate */
}
assert(NULL);
-}
-
-/* Decode the encoded length pointed by 'p'. If a pointer to 'lensize' is
- * provided, it is set to the number of bytes required to encode the length. */
-static unsigned int zipDecodeLength(unsigned char *p, unsigned int *lensize) {
- unsigned char encoding = ZIP_ENCODING(p), lenenc;
- unsigned int len;
-
- if (encoding == ZIP_ENC_RAW) {
- lenenc = (p[0] >> 4) & 0x3;
- if (lenenc == ZIP_LEN_INLINE) {
- len = p[0] & 0xf;
- if (lensize) *lensize = 1;
- } else if (lenenc == ZIP_LEN_UINT16) {
- len = p[1] | (p[2] << 8);
- if (lensize) *lensize = 3;
- } else {
- len = p[1] | (p[2] << 8) | (p[3] << 16) | (p[4] << 24);
- if (lensize) *lensize = 5;
- }
- } else {
- len = zipEncodingSize(encoding);
- if (lensize) *lensize = 1;
- }
- return len;
+ return 0;
}
/* Encode the length 'l' writing it in 'p'. If p is NULL it just returns
* the amount of bytes required to encode such a length. */
-static unsigned int zipEncodeLength(unsigned char *p, char encoding, unsigned int rawlen) {
- unsigned char len = 1, lenenc, buf[5];
- if (encoding == ZIP_ENC_RAW) {
- if (rawlen <= 0xf) {
+static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
+ unsigned char len = 1, buf[5];
+
+ if (ZIP_IS_STR(encoding)) {
+ /* Although encoding is given it may not be set for strings,
+ * so we determine it here using the raw length. */
+ if (rawlen <= 0x3f) {
if (!p) return len;
- lenenc = ZIP_LEN_INLINE;
- buf[0] = rawlen;
- } else if (rawlen <= 0xffff) {
- len += 2;
+ buf[0] = ZIP_STR_06B | rawlen;
+ } else if (rawlen <= 0x3fff) {
+ len += 1;
if (!p) return len;
- lenenc = ZIP_LEN_UINT16;
- buf[1] = (rawlen ) & 0xff;
- buf[2] = (rawlen >> 8) & 0xff;
+ buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f);
+ buf[1] = rawlen & 0xff;
} else {
len += 4;
if (!p) return len;
- lenenc = ZIP_LEN_UINT32;
- buf[1] = (rawlen ) & 0xff;
- buf[2] = (rawlen >> 8) & 0xff;
- buf[3] = (rawlen >> 16) & 0xff;
- buf[4] = (rawlen >> 24) & 0xff;
+ buf[0] = ZIP_STR_32B;
+ buf[1] = (rawlen >> 24) & 0xff;
+ buf[2] = (rawlen >> 16) & 0xff;
+ buf[3] = (rawlen >> 8) & 0xff;
+ buf[4] = rawlen & 0xff;
}
- buf[0] = (lenenc << 4) | (buf[0] & 0xf);
+ } else {
+ /* Implies integer encoding, so length is always 1. */
+ if (!p) return len;
+ buf[0] = encoding;
}
- if (!p) return len;
- /* Apparently we need to store the length in 'p' */
- buf[0] = (encoding << 6) | (buf[0] & 0x3f);
+ /* Store this length at p */
memcpy(p,buf,len);
return len;
}
-/* Decode the length of the previous element stored at "p". */
-static unsigned int zipPrevDecodeLength(unsigned char *p, unsigned int *lensize) {
- unsigned int len = *p;
- if (len < ZIP_BIGLEN) {
- if (lensize) *lensize = 1;
- } else {
- if (lensize) *lensize = 1+sizeof(len);
- memcpy(&len,p+1,sizeof(len));
- }
- return len;
-}
+/* Decode the length encoded in 'ptr'. The 'encoding' variable will hold the
+ * entries encoding, the 'lensize' variable will hold the number of bytes
+ * required to encode the entries length, and the 'len' variable will hold the
+ * entries length. */
+#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do { \
+ ZIP_ENTRY_ENCODING((ptr), (encoding)); \
+ if ((encoding) < ZIP_STR_MASK) { \
+ if ((encoding) == ZIP_STR_06B) { \
+ (lensize) = 1; \
+ (len) = (ptr)[0] & 0x3f; \
+ } else if ((encoding) == ZIP_STR_14B) { \
+ (lensize) = 2; \
+ (len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1]; \
+ } else if (encoding == ZIP_STR_32B) { \
+ (lensize) = 5; \
+ (len) = ((ptr)[1] << 24) | \
+ ((ptr)[2] << 16) | \
+ ((ptr)[3] << 8) | \
+ ((ptr)[4]); \
+ } else { \
+ assert(NULL); \
+ } \
+ } else { \
+ (lensize) = 1; \
+ (len) = zipIntSize(encoding); \
+ } \
+} while(0);
/* Encode the length of the previous entry and write it to "p". Return the
* number of bytes needed to encode this length if "p" is NULL. */
} else {
p[0] = ZIP_BIGLEN;
memcpy(p+1,&len,sizeof(len));
+ memrev32ifbe(p+1);
return 1+sizeof(len);
}
}
}
-/* Return the difference in number of bytes needed to store the new length
- * "len" on the entry pointed to by "p". */
+/* Encode the length of the previous entry and write it to "p". This only
+ * uses the larger encoding (required in __ziplistCascadeUpdate). */
+static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) {
+ if (p == NULL) return;
+ p[0] = ZIP_BIGLEN;
+ memcpy(p+1,&len,sizeof(len));
+ memrev32ifbe(p+1);
+}
+
+/* Decode the number of bytes required to store the length of the previous
+ * element, from the perspective of the entry pointed to by 'ptr'. */
+#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do { \
+ if ((ptr)[0] < ZIP_BIGLEN) { \
+ (prevlensize) = 1; \
+ } else { \
+ (prevlensize) = 5; \
+ } \
+} while(0);
+
+/* Decode the length of the previous element, from the perspective of the entry
+ * pointed to by 'ptr'. */
+#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do { \
+ ZIP_DECODE_PREVLENSIZE(ptr, prevlensize); \
+ if ((prevlensize) == 1) { \
+ (prevlen) = (ptr)[0]; \
+ } else if ((prevlensize) == 5) { \
+ assert(sizeof((prevlensize)) == 4); \
+ memcpy(&(prevlen), ((char*)(ptr)) + 1, 4); \
+ memrev32ifbe(&prevlen); \
+ } \
+} while(0);
+
+/* Return the difference in number of bytes needed to store the length of the
+ * previous element 'len', in the entry pointed to by 'p'. */
static int zipPrevLenByteDiff(unsigned char *p, unsigned int len) {
unsigned int prevlensize;
- zipPrevDecodeLength(p,&prevlensize);
- return zipPrevEncodeLength(NULL,len)-prevlensize;
+ ZIP_DECODE_PREVLENSIZE(p, prevlensize);
+ return zipPrevEncodeLength(NULL, len) - prevlensize;
+}
+
+/* Return the total number of bytes used by the entry pointed to by 'p'. */
+static unsigned int zipRawEntryLength(unsigned char *p) {
+ unsigned int prevlensize, encoding, lensize, len;
+ ZIP_DECODE_PREVLENSIZE(p, prevlensize);
+ ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len);
+ return prevlensize + lensize + len;
}
/* Check if string pointed to by 'entry' can be encoded as an integer.
- * Stores the integer value in 'v' and its encoding in 'encoding'.
- * Warning: this function requires a NULL-terminated string! */
-static int zipTryEncoding(unsigned char *entry, long long *v, unsigned char *encoding) {
+ * Stores the integer value in 'v' and its encoding in 'encoding'. */
+static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
long long value;
- char *eptr;
- if (entry[0] == '-' || (entry[0] >= '0' && entry[0] <= '9')) {
- value = strtoll((char*)entry,&eptr,10);
- if (eptr[0] != '\0') return 0;
- if (value >= INT16_MIN && value <= INT16_MAX) {
- *encoding = ZIP_ENC_INT16;
+ if (entrylen >= 32 || entrylen == 0) return 0;
+ if (string2ll((char*)entry,entrylen,&value)) {
+ /* Great, the string can be encoded. Check what's the smallest
+ * of our encoding types that can hold this value. */
+ if (value >= 0 && value <= 12) {
+ *encoding = ZIP_INT_IMM_MIN+value;
+ } else if (value >= INT8_MIN && value <= INT8_MAX) {
+ *encoding = ZIP_INT_8B;
+ } else if (value >= INT16_MIN && value <= INT16_MAX) {
+ *encoding = ZIP_INT_16B;
+ } else if (value >= INT24_MIN && value <= INT24_MAX) {
+ *encoding = ZIP_INT_24B;
} else if (value >= INT32_MIN && value <= INT32_MAX) {
- *encoding = ZIP_ENC_INT32;
+ *encoding = ZIP_INT_32B;
} else {
- *encoding = ZIP_ENC_INT64;
+ *encoding = ZIP_INT_64B;
}
*v = value;
return 1;
int16_t i16;
int32_t i32;
int64_t i64;
- if (encoding == ZIP_ENC_INT16) {
+ if (encoding == ZIP_INT_8B) {
+ ((char*)p)[0] = (char)value;
+ } else if (encoding == ZIP_INT_16B) {
i16 = value;
memcpy(p,&i16,sizeof(i16));
- } else if (encoding == ZIP_ENC_INT32) {
+ memrev16ifbe(p);
+ } else if (encoding == ZIP_INT_24B) {
+ i32 = value<<8;
+ memrev32ifbe(&i32);
+ memcpy(p,((unsigned char*)&i32)+1,sizeof(i32)-sizeof(int8_t));
+ } else if (encoding == ZIP_INT_32B) {
i32 = value;
memcpy(p,&i32,sizeof(i32));
- } else if (encoding == ZIP_ENC_INT64) {
+ memrev32ifbe(p);
+ } else if (encoding == ZIP_INT_64B) {
i64 = value;
memcpy(p,&i64,sizeof(i64));
+ memrev64ifbe(p);
+ } else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) {
+ /* Nothing to do, the value is stored in the encoding itself. */
} else {
assert(NULL);
}
static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
int16_t i16;
int32_t i32;
- int64_t i64, ret;
- if (encoding == ZIP_ENC_INT16) {
+ int64_t i64, ret = 0;
+ if (encoding == ZIP_INT_8B) {
+ ret = ((char*)p)[0];
+ } else if (encoding == ZIP_INT_16B) {
memcpy(&i16,p,sizeof(i16));
+ memrev16ifbe(&i16);
ret = i16;
- } else if (encoding == ZIP_ENC_INT32) {
+ } else if (encoding == ZIP_INT_32B) {
memcpy(&i32,p,sizeof(i32));
+ memrev32ifbe(&i32);
ret = i32;
- } else if (encoding == ZIP_ENC_INT64) {
+ } else if (encoding == ZIP_INT_24B) {
+ i32 = 0;
+ memcpy(((unsigned char*)&i32)+1,p,sizeof(i32)-sizeof(int8_t));
+ memrev32ifbe(&i32);
+ ret = i32>>8;
+ } else if (encoding == ZIP_INT_64B) {
memcpy(&i64,p,sizeof(i64));
+ memrev64ifbe(&i64);
ret = i64;
+ } else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) {
+ ret = (encoding & ZIP_INT_IMM_MASK)-1;
} else {
assert(NULL);
}
/* Return a struct with all information about an entry. */
static zlentry zipEntry(unsigned char *p) {
zlentry e;
- e.prevrawlen = zipPrevDecodeLength(p,&e.prevrawlensize);
- e.len = zipDecodeLength(p+e.prevrawlensize,&e.lensize);
- e.headersize = e.prevrawlensize+e.lensize;
- e.encoding = ZIP_ENCODING(p+e.prevrawlensize);
+
+ ZIP_DECODE_PREVLEN(p, e.prevrawlensize, e.prevrawlen);
+ ZIP_DECODE_LENGTH(p + e.prevrawlensize, e.encoding, e.lensize, e.len);
+ e.headersize = e.prevrawlensize + e.lensize;
e.p = p;
return e;
}
-/* Return the total number of bytes used by the entry at "p". */
-static unsigned int zipRawEntryLength(unsigned char *p) {
- zlentry e = zipEntry(p);
- return e.headersize + e.len;
-}
-
/* Create a new empty ziplist. */
unsigned char *ziplistNew(void) {
unsigned int bytes = ZIPLIST_HEADER_SIZE+1;
unsigned char *zl = zmalloc(bytes);
- ZIPLIST_BYTES(zl) = bytes;
- ZIPLIST_TAIL_OFFSET(zl) = ZIPLIST_HEADER_SIZE;
+ ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
+ ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
ZIPLIST_LENGTH(zl) = 0;
zl[bytes-1] = ZIP_END;
return zl;
/* Resize the ziplist. */
static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
zl = zrealloc(zl,len);
- ZIPLIST_BYTES(zl) = len;
+ ZIPLIST_BYTES(zl) = intrev32ifbe(len);
zl[len-1] = ZIP_END;
return zl;
}
+/* When an entry is inserted, we need to set the prevlen field of the next
+ * entry to equal the length of the inserted entry. It can occur that this
+ * length cannot be encoded in 1 byte and the next entry needs to be grow
+ * a bit larger to hold the 5-byte encoded prevlen. This can be done for free,
+ * because this only happens when an entry is already being inserted (which
+ * causes a realloc and memmove). However, encoding the prevlen may require
+ * that this entry is grown as well. This effect may cascade throughout
+ * the ziplist when there are consecutive entries with a size close to
+ * ZIP_BIGLEN, so we need to check that the prevlen can be encoded in every
+ * consecutive entry.
+ *
+ * Note that this effect can also happen in reverse, where the bytes required
+ * to encode the prevlen field can shrink. This effect is deliberately ignored,
+ * because it can cause a "flapping" effect where a chain prevlen fields is
+ * first grown and then shrunk again after consecutive inserts. Rather, the
+ * field is allowed to stay larger than necessary, because a large prevlen
+ * field implies the ziplist is holding large entries anyway.
+ *
+ * The pointer "p" points to the first entry that does NOT need to be
+ * updated, i.e. consecutive fields MAY need an update. */
+static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
+ size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize;
+ size_t offset, noffset, extra;
+ unsigned char *np;
+ zlentry cur, next;
+
+ while (p[0] != ZIP_END) {
+ cur = zipEntry(p);
+ rawlen = cur.headersize + cur.len;
+ rawlensize = zipPrevEncodeLength(NULL,rawlen);
+
+ /* Abort if there is no next entry. */
+ if (p[rawlen] == ZIP_END) break;
+ next = zipEntry(p+rawlen);
+
+ /* Abort when "prevlen" has not changed. */
+ if (next.prevrawlen == rawlen) break;
+
+ if (next.prevrawlensize < rawlensize) {
+ /* The "prevlen" field of "next" needs more bytes to hold
+ * the raw length of "cur". */
+ offset = p-zl;
+ extra = rawlensize-next.prevrawlensize;
+ zl = ziplistResize(zl,curlen+extra);
+ p = zl+offset;
+
+ /* Current pointer and offset for next element. */
+ np = p+rawlen;
+ noffset = np-zl;
+
+ /* Update tail offset when next element is not the tail element. */
+ if ((zl+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) != np) {
+ ZIPLIST_TAIL_OFFSET(zl) =
+ intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra);
+ }
+
+ /* Move the tail to the back. */
+ memmove(np+rawlensize,
+ np+next.prevrawlensize,
+ curlen-noffset-next.prevrawlensize-1);
+ zipPrevEncodeLength(np,rawlen);
+
+ /* Advance the cursor */
+ p += rawlen;
+ curlen += extra;
+ } else {
+ if (next.prevrawlensize > rawlensize) {
+ /* This would result in shrinking, which we want to avoid.
+ * So, set "rawlen" in the available bytes. */
+ zipPrevEncodeLengthForceLarge(p+rawlen,rawlen);
+ } else {
+ zipPrevEncodeLength(p+rawlen,rawlen);
+ }
+
+ /* Stop here, as the raw length of "next" has not changed. */
+ break;
+ }
+ }
+ return zl;
+}
+
/* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */
static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
unsigned int i, totlen, deleted = 0;
+ size_t offset;
int nextdiff = 0;
- zlentry first = zipEntry(p);
+ zlentry first, tail;
+
+ first = zipEntry(p);
for (i = 0; p[0] != ZIP_END && i < num; i++) {
p += zipRawEntryLength(p);
deleted++;
zipPrevEncodeLength(p-nextdiff,first.prevrawlen);
/* Update offset for tail */
- ZIPLIST_TAIL_OFFSET(zl) -= totlen+nextdiff;
+ ZIPLIST_TAIL_OFFSET(zl) =
+ intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))-totlen);
+
+ /* When the tail contains more than one entry, we need to take
+ * "nextdiff" in account as well. Otherwise, a change in the
+ * size of prevlen doesn't have an effect on the *tail* offset. */
+ tail = zipEntry(p);
+ if (p[tail.headersize+tail.len] != ZIP_END) {
+ ZIPLIST_TAIL_OFFSET(zl) =
+ intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
+ }
/* Move tail to the front of the ziplist */
- memmove(first.p,p-nextdiff,ZIPLIST_BYTES(zl)-(p-zl)-1+nextdiff);
+ memmove(first.p,p-nextdiff,
+ intrev32ifbe(ZIPLIST_BYTES(zl))-(p-zl)-1+nextdiff);
} else {
/* The entire tail was deleted. No need to move memory. */
- ZIPLIST_TAIL_OFFSET(zl) = (first.p-zl)-first.prevrawlen;
+ ZIPLIST_TAIL_OFFSET(zl) =
+ intrev32ifbe((first.p-zl)-first.prevrawlen);
}
/* Resize and update length */
- zl = ziplistResize(zl, ZIPLIST_BYTES(zl)-totlen+nextdiff);
+ offset = first.p-zl;
+ zl = ziplistResize(zl, intrev32ifbe(ZIPLIST_BYTES(zl))-totlen+nextdiff);
ZIPLIST_INCR_LENGTH(zl,-deleted);
+ p = zl+offset;
+
+ /* When nextdiff != 0, the raw length of the next entry has changed, so
+ * we need to cascade the update throughout the ziplist */
+ if (nextdiff != 0)
+ zl = __ziplistCascadeUpdate(zl,p);
}
return zl;
}
/* Insert item at "p". */
static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
- unsigned int curlen = ZIPLIST_BYTES(zl), reqlen, prevlen = 0;
- unsigned int offset, nextdiff = 0;
- unsigned char *tail;
- unsigned char encoding = ZIP_ENC_RAW;
- long long value;
- zlentry entry;
+ size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, prevlen = 0;
+ size_t offset;
+ int nextdiff = 0;
+ unsigned char encoding = 0;
+ long long value = 123456789; /* initialized to avoid warning. Using a value
+ that is easy to see if for some reason
+ we use it uninitialized. */
+ zlentry entry, tail;
/* Find out prevlen for the entry that is inserted. */
if (p[0] != ZIP_END) {
entry = zipEntry(p);
prevlen = entry.prevrawlen;
} else {
- tail = ZIPLIST_ENTRY_TAIL(zl);
- if (tail[0] != ZIP_END) {
- prevlen = zipRawEntryLength(tail);
+ unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
+ if (ptail[0] != ZIP_END) {
+ prevlen = zipRawEntryLength(ptail);
}
}
/* See if the entry can be encoded */
- if (zipTryEncoding(s,&value,&encoding)) {
- reqlen = zipEncodingSize(encoding);
+ if (zipTryEncoding(s,slen,&value,&encoding)) {
+ /* 'encoding' is set to the appropriate integer encoding */
+ reqlen = zipIntSize(encoding);
} else {
+ /* 'encoding' is untouched, however zipEncodeLength will use the
+ * string length to figure out how to encode it. */
reqlen = slen;
}
-
/* We need space for both the length of the previous entry and
* the length of the payload. */
reqlen += zipPrevEncodeLength(NULL,prevlen);
if (p[0] != ZIP_END) {
/* Subtract one because of the ZIP_END bytes */
memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);
+
/* Encode this entry's raw length in the next entry. */
zipPrevEncodeLength(p+reqlen,reqlen);
+
/* Update offset for tail */
- ZIPLIST_TAIL_OFFSET(zl) += reqlen+nextdiff;
+ ZIPLIST_TAIL_OFFSET(zl) =
+ intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);
+
+ /* When the tail contains more than one entry, we need to take
+ * "nextdiff" in account as well. Otherwise, a change in the
+ * size of prevlen doesn't have an effect on the *tail* offset. */
+ tail = zipEntry(p+reqlen);
+ if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
+ ZIPLIST_TAIL_OFFSET(zl) =
+ intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
+ }
} else {
/* This element will be the new tail. */
- ZIPLIST_TAIL_OFFSET(zl) = p-zl;
+ ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
+ }
+
+ /* When nextdiff != 0, the raw length of the next entry has changed, so
+ * we need to cascade the update throughout the ziplist */
+ if (nextdiff != 0) {
+ offset = p-zl;
+ zl = __ziplistCascadeUpdate(zl,p+reqlen);
+ p = zl+offset;
}
/* Write the entry */
p += zipPrevEncodeLength(p,prevlen);
p += zipEncodeLength(p,encoding,slen);
- if (encoding != ZIP_ENC_RAW) {
- zipSaveInteger(p,value,encoding);
- } else {
+ if (ZIP_IS_STR(encoding)) {
memcpy(p,s,slen);
+ } else {
+ zipSaveInteger(p,value,encoding);
}
ZIPLIST_INCR_LENGTH(zl,1);
return zl;
return (p[0] == ZIP_END || index > 0) ? NULL : p;
}
-/* Return pointer to next entry in ziplist. */
+/* Return pointer to next entry in ziplist.
+ *
+ * zl is the pointer to the ziplist
+ * p is the pointer to the current element
+ *
+ * The element after 'p' is returned, otherwise NULL if we are at the end. */
unsigned char *ziplistNext(unsigned char *zl, unsigned char *p) {
((void) zl);
* when the *next* element is ZIP_END (there is no next entry). */
if (p[0] == ZIP_END) {
return NULL;
- } else {
- p = p+zipRawEntryLength(p);
- return (p[0] == ZIP_END) ? NULL : p;
}
+
+ p += zipRawEntryLength(p);
+ if (p[0] == ZIP_END) {
+ return NULL;
+ }
+
+ return p;
}
/* Return pointer to previous entry in ziplist. */
return NULL;
} else {
entry = zipEntry(p);
+ assert(entry.prevrawlen > 0);
return p-entry.prevrawlen;
}
}
if (sstr) *sstr = NULL;
entry = zipEntry(p);
- if (entry.encoding == ZIP_ENC_RAW) {
+ if (ZIP_IS_STR(entry.encoding)) {
if (sstr) {
*slen = entry.len;
*sstr = p+entry.headersize;
* Also update *p in place, to be able to iterate over the
* ziplist, while deleting entries. */
unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p) {
- unsigned int offset = *p-zl;
+ size_t offset = *p-zl;
zl = __ziplistDelete(zl,*p,1);
/* Store pointer to current element in p, because ziplistDelete will
if (p[0] == ZIP_END) return 0;
entry = zipEntry(p);
- if (entry.encoding == ZIP_ENC_RAW) {
+ if (ZIP_IS_STR(entry.encoding)) {
/* Raw compare */
if (entry.len == slen) {
return memcmp(p+entry.headersize,sstr,slen) == 0;
}
} else {
/* Try to compare encoded values */
- if (zipTryEncoding(sstr,&sval,&sencoding)) {
+ if (zipTryEncoding(sstr,slen,&sval,&sencoding)) {
if (entry.encoding == sencoding) {
zval = zipLoadInteger(p+entry.headersize,entry.encoding);
return zval == sval;
return 0;
}
+/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries
+ * between every comparison. Returns NULL when the field could not be found. */
+unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip) {
+ int skipcnt = 0;
+ unsigned char vencoding = 0;
+ long long vll = 0;
+
+ while (p[0] != ZIP_END) {
+ unsigned int prevlensize, encoding, lensize, len;
+ unsigned char *q;
+
+ ZIP_DECODE_PREVLENSIZE(p, prevlensize);
+ ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len);
+ q = p + prevlensize + lensize;
+
+ if (skipcnt == 0) {
+ /* Compare current entry with specified entry */
+ if (ZIP_IS_STR(encoding)) {
+ if (len == vlen && memcmp(q, vstr, vlen) == 0) {
+ return p;
+ }
+ } else {
+ /* Find out if the specified entry can be encoded */
+ if (vencoding == 0) {
+ /* UINT_MAX when the entry CANNOT be encoded */
+ if (!zipTryEncoding(vstr, vlen, &vll, &vencoding)) {
+ vencoding = UCHAR_MAX;
+ }
+
+ /* Must be non-zero by now */
+ assert(vencoding);
+ }
+
+ /* Compare current entry with specified entry */
+ if (encoding == vencoding) {
+ long long ll = zipLoadInteger(q, encoding);
+ if (ll == vll) {
+ return p;
+ }
+ }
+ }
+
+ /* Reset skip count */
+ skipcnt = skip;
+ } else {
+ /* Skip entry */
+ skipcnt--;
+ }
+
+ /* Move to next entry */
+ p = q + len;
+ }
+
+ return NULL;
+}
+
/* Return length of ziplist. */
unsigned int ziplistLen(unsigned char *zl) {
unsigned int len = 0;
- if (ZIPLIST_LENGTH(zl) < UINT16_MAX) {
- len = ZIPLIST_LENGTH(zl);
+ if (intrev16ifbe(ZIPLIST_LENGTH(zl)) < UINT16_MAX) {
+ len = intrev16ifbe(ZIPLIST_LENGTH(zl));
} else {
unsigned char *p = zl+ZIPLIST_HEADER_SIZE;
while (*p != ZIP_END) {
}
/* Re-store length if small enough */
- if (len < UINT16_MAX) ZIPLIST_LENGTH(zl) = len;
+ if (len < UINT16_MAX) ZIPLIST_LENGTH(zl) = intrev16ifbe(len);
}
return len;
}
-/* Return size in bytes of ziplist. */
-unsigned int ziplistSize(unsigned char *zl) {
- return ZIPLIST_BYTES(zl);
+/* Return ziplist blob size in bytes. */
+size_t ziplistBlobLen(unsigned char *zl) {
+ return intrev32ifbe(ZIPLIST_BYTES(zl));
}
void ziplistRepr(unsigned char *zl) {
unsigned char *p;
+ int index = 0;
zlentry entry;
- printf("{total bytes %d} {length %u}\n",ZIPLIST_BYTES(zl), ZIPLIST_LENGTH(zl));
+ printf(
+ "{total bytes %d} "
+ "{length %u}\n"
+ "{tail offset %u}\n",
+ intrev32ifbe(ZIPLIST_BYTES(zl)),
+ intrev16ifbe(ZIPLIST_LENGTH(zl)),
+ intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)));
p = ZIPLIST_ENTRY_HEAD(zl);
while(*p != ZIP_END) {
entry = zipEntry(p);
- printf("{offset %ld, header %u, payload %u} ",p-zl,entry.headersize,entry.len);
+ printf(
+ "{"
+ "addr 0x%08lx, "
+ "index %2d, "
+ "offset %5ld, "
+ "rl: %5u, "
+ "hs %2u, "
+ "pl: %5u, "
+ "pls: %2u, "
+ "payload %5u"
+ "} ",
+ (long unsigned)p,
+ index,
+ (unsigned long) (p-zl),
+ entry.headersize+entry.len,
+ entry.headersize,
+ entry.prevrawlen,
+ entry.prevrawlensize,
+ entry.len);
p += entry.headersize;
- if (entry.encoding == ZIP_ENC_RAW) {
- fwrite(p,entry.len,1,stdout);
+ if (ZIP_IS_STR(entry.encoding)) {
+ if (entry.len > 40) {
+ if (fwrite(p,40,1,stdout) == 0) perror("fwrite");
+ printf("...");
+ } else {
+ if (entry.len &&
+ fwrite(p,entry.len,1,stdout) == 0) perror("fwrite");
+ }
} else {
printf("%lld", (long long) zipLoadInteger(p,entry.encoding));
}
printf("\n");
p += entry.len;
+ index++;
}
printf("{end}\n\n");
}
#ifdef ZIPLIST_TEST_MAIN
#include <sys/time.h>
+#include "adlist.h"
+#include "sds.h"
+
+#define debug(f, ...) { if (DEBUG) printf(f, __VA_ARGS__); }
unsigned char *createList() {
unsigned char *zl = ziplistNew();
zl = ziplistDeleteRange(zl,0,1);
}
printf("List size: %8d, bytes: %8d, %dx push+pop (%s): %6lld usec\n",
- i,ZIPLIST_BYTES(zl),num,posstr[pos],usec()-start);
+ i,intrev32ifbe(ZIPLIST_BYTES(zl)),num,posstr[pos],usec()-start);
zfree(zl);
}
}
printf("Pop tail: ");
if (vstr)
- fwrite(vstr,vlen,1,stdout);
+ if (vlen && fwrite(vstr,vlen,1,stdout) == 0) perror("fwrite");
else
printf("%lld", vlong);
}
}
+int randstring(char *target, unsigned int min, unsigned int max) {
+ int p, len = min+rand()%(max-min+1);
+ int minval, maxval;
+ switch(rand() % 3) {
+ case 0:
+ minval = 0;
+ maxval = 255;
+ break;
+ case 1:
+ minval = 48;
+ maxval = 122;
+ break;
+ case 2:
+ minval = 48;
+ maxval = 52;
+ break;
+ default:
+ assert(NULL);
+ }
+
+ while(p < len)
+ target[p++] = minval+rand()%(maxval-minval+1);
+ return len;
+}
+
int main(int argc, char **argv) {
unsigned char *zl, *p;
unsigned char *entry;
unsigned int elen;
long long value;
+ /* If an argument is given, use it as the random seed. */
+ if (argc == 2)
+ srand(atoi(argv[1]));
+
zl = createIntList();
ziplistRepr(zl);
return 1;
}
if (entry) {
- fwrite(entry,elen,1,stdout);
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
printf("\n");
} else {
printf("%lld\n", value);
return 1;
}
if (entry) {
- fwrite(entry,elen,1,stdout);
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
printf("\n");
} else {
printf("%lld\n", value);
return 1;
}
if (entry) {
- fwrite(entry,elen,1,stdout);
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
printf("\n");
} else {
printf("%lld\n", value);
while (ziplistGet(p, &entry, &elen, &value)) {
printf("Entry: ");
if (entry) {
- fwrite(entry,elen,1,stdout);
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
} else {
printf("%lld", value);
}
while (ziplistGet(p, &entry, &elen, &value)) {
printf("Entry: ");
if (entry) {
- fwrite(entry,elen,1,stdout);
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
} else {
printf("%lld", value);
}
while (ziplistGet(p, &entry, &elen, &value)) {
printf("Entry: ");
if (entry) {
- fwrite(entry,elen,1,stdout);
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
} else {
printf("%lld", value);
}
while (ziplistGet(p, &entry, &elen, &value)) {
printf("Entry: ");
if (entry) {
- fwrite(entry,elen,1,stdout);
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
} else {
printf("%lld", value);
}
while (ziplistGet(p, &entry, &elen, &value)) {
printf("Entry: ");
if (entry) {
- fwrite(entry,elen,1,stdout);
+ if (elen && fwrite(entry,elen,1,stdout) == 0) perror("fwrite");
} else {
printf("%lld", value);
}
} else {
printf("Entry: ");
if (entry) {
- fwrite(entry,elen,1,stdout);
+ if (elen && fwrite(entry,elen,1,stdout) == 0)
+ perror("fwrite");
} else {
printf("%lld",value);
}
ziplistRepr(zl);
}
+ printf("Regression test for >255 byte strings:\n");
+ {
+ char v1[257],v2[257];
+ memset(v1,'x',256);
+ memset(v2,'y',256);
+ zl = ziplistNew();
+ zl = ziplistPush(zl,(unsigned char*)v1,strlen(v1),ZIPLIST_TAIL);
+ zl = ziplistPush(zl,(unsigned char*)v2,strlen(v2),ZIPLIST_TAIL);
+
+ /* Pop values again and compare their value. */
+ p = ziplistIndex(zl,0);
+ assert(ziplistGet(p,&entry,&elen,&value));
+ assert(strncmp(v1,(char*)entry,elen) == 0);
+ p = ziplistIndex(zl,1);
+ assert(ziplistGet(p,&entry,&elen,&value));
+ assert(strncmp(v2,(char*)entry,elen) == 0);
+ printf("SUCCESS\n\n");
+ }
+
printf("Create long list and check indices:\n");
{
zl = ziplistNew();
printf("ERROR: \"1025\"\n");
return 1;
}
- printf("SUCCESS\n");
+ printf("SUCCESS\n\n");
+ }
+
+ printf("Stress with random payloads of different encoding:\n");
+ {
+ int i,j,len,where;
+ unsigned char *p;
+ char buf[1024];
+ int buflen;
+ list *ref;
+ listNode *refnode;
+
+ /* Hold temp vars from ziplist */
+ unsigned char *sstr;
+ unsigned int slen;
+ long long sval;
+
+ for (i = 0; i < 20000; i++) {
+ zl = ziplistNew();
+ ref = listCreate();
+ listSetFreeMethod(ref,sdsfree);
+ len = rand() % 256;
+
+ /* Create lists */
+ for (j = 0; j < len; j++) {
+ where = (rand() & 1) ? ZIPLIST_HEAD : ZIPLIST_TAIL;
+ if (rand() % 2) {
+ buflen = randstring(buf,1,sizeof(buf)-1);
+ } else {
+ switch(rand() % 3) {
+ case 0:
+ buflen = sprintf(buf,"%lld",(0LL + rand()) >> 20);
+ break;
+ case 1:
+ buflen = sprintf(buf,"%lld",(0LL + rand()));
+ break;
+ case 2:
+ buflen = sprintf(buf,"%lld",(0LL + rand()) << 20);
+ break;
+ default:
+ assert(NULL);
+ }
+ }
+
+ /* Add to ziplist */
+ zl = ziplistPush(zl, (unsigned char*)buf, buflen, where);
+
+ /* Add to reference list */
+ if (where == ZIPLIST_HEAD) {
+ listAddNodeHead(ref,sdsnewlen(buf, buflen));
+ } else if (where == ZIPLIST_TAIL) {
+ listAddNodeTail(ref,sdsnewlen(buf, buflen));
+ } else {
+ assert(NULL);
+ }
+ }
+
+ assert(listLength(ref) == ziplistLen(zl));
+ for (j = 0; j < len; j++) {
+ /* Naive way to get elements, but similar to the stresser
+ * executed from the Tcl test suite. */
+ p = ziplistIndex(zl,j);
+ refnode = listIndex(ref,j);
+
+ assert(ziplistGet(p,&sstr,&slen,&sval));
+ if (sstr == NULL) {
+ buflen = sprintf(buf,"%lld",sval);
+ } else {
+ buflen = slen;
+ memcpy(buf,sstr,buflen);
+ buf[buflen] = '\0';
+ }
+ assert(memcmp(buf,listNodeValue(refnode),buflen) == 0);
+ }
+ zfree(zl);
+ listRelease(ref);
+ }
+ printf("SUCCESS\n\n");
}
printf("Stress with variable ziplist size:\n");