2  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
   5  * Redistribution and use in source and binary forms, with or without 
   6  * modification, are permitted provided that the following conditions are met: 
   8  *   * Redistributions of source code must retain the above copyright notice, 
   9  *     this list of conditions and the following disclaimer. 
  10  *   * Redistributions in binary form must reproduce the above copyright 
  11  *     notice, this list of conditions and the following disclaimer in the 
  12  *     documentation and/or other materials provided with the distribution. 
  13  *   * Neither the name of Redis nor the names of its contributors may be used 
  14  *     to endorse or promote products derived from this software without 
  15  *     specific prior written permission. 
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  27  * POSSIBILITY OF SUCH DAMAGE. 
  30 #define REDIS_VERSION "1.3.10" 
  45 #endif /* HAVE_BACKTRACE */ 
  53 #include <arpa/inet.h> 
  57 #include <sys/resource.h> 
  64 #include "solarisfixes.h" 
  68 #include "ae.h"     /* Event driven programming library */ 
  69 #include "sds.h"    /* Dynamic safe strings */ 
  70 #include "anet.h"   /* Networking the easy way */ 
  71 #include "dict.h"   /* Hash tables */ 
  72 #include "adlist.h" /* Linked lists */ 
  73 #include "zmalloc.h" /* total memory usage aware version of malloc/free */ 
  74 #include "lzf.h"    /* LZF compression library */ 
  75 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ 
  82 /* Static server configuration */ 
  83 #define REDIS_SERVERPORT        6379    /* TCP port */ 
  84 #define REDIS_MAXIDLETIME       (60*5)  /* default client timeout */ 
  85 #define REDIS_IOBUF_LEN         1024 
  86 #define REDIS_LOADBUF_LEN       1024 
  87 #define REDIS_STATIC_ARGS       8 
  88 #define REDIS_DEFAULT_DBNUM     16 
  89 #define REDIS_CONFIGLINE_MAX    1024 
  90 #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */ 
  91 #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */ 
  92 #define REDIS_EXPIRELOOKUPS_PER_CRON    10 /* lookup 10 expires per loop */ 
  93 #define REDIS_MAX_WRITE_PER_EVENT (1024*64) 
  94 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ 
  96 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ 
  97 #define REDIS_WRITEV_THRESHOLD      3 
  98 /* Max number of iovecs used for each writev call */ 
  99 #define REDIS_WRITEV_IOVEC_COUNT    256 
 101 /* Hash table parameters */ 
 102 #define REDIS_HT_MINFILL        10      /* Minimal hash table fill 10% */ 
 105 #define REDIS_CMD_BULK          1       /* Bulk write command */ 
 106 #define REDIS_CMD_INLINE        2       /* Inline command */ 
 107 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with 
 108    this flags will return an error when the 'maxmemory' option is set in the 
 109    config file and the server is using more than maxmemory bytes of memory. 
 110    In short this commands are denied on low memory conditions. */ 
 111 #define REDIS_CMD_DENYOOM       4 
 112 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */ 
 115 #define REDIS_STRING 0 
 121 /* Objects encoding. Some kind of objects like Strings and Hashes can be 
 122  * internally represented in multiple ways. The 'encoding' field of the object 
 123  * is set to one of this fields for this object. */ 
 124 #define REDIS_ENCODING_RAW 0    /* Raw representation */ 
 125 #define REDIS_ENCODING_INT 1    /* Encoded as integer */ 
 126 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */ 
 127 #define REDIS_ENCODING_HT 3     /* Encoded as an hash table */ 
 129 static char* strencoding
[] = { 
 130     "raw", "int", "zipmap", "hashtable" 
 133 /* Object types only used for dumping to disk */ 
 134 #define REDIS_EXPIRETIME 253 
 135 #define REDIS_SELECTDB 254 
 136 #define REDIS_EOF 255 
 138 /* Defines related to the dump file format. To store 32 bits lengths for short 
 139  * keys requires a lot of space, so we check the most significant 2 bits of 
 140  * the first byte to interpreter the length: 
 142  * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte 
 143  * 01|000000 00000000 =>  01, the len is 14 byes, 6 bits + 8 bits of next byte 
 144  * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow 
 145  * 11|000000 this means: specially encoded object will follow. The six bits 
 146  *           number specify the kind of object that follows. 
 147  *           See the REDIS_RDB_ENC_* defines. 
 149  * Lenghts up to 63 are stored using a single byte, most DB keys, and may 
 150  * values, will fit inside. */ 
 151 #define REDIS_RDB_6BITLEN 0 
 152 #define REDIS_RDB_14BITLEN 1 
 153 #define REDIS_RDB_32BITLEN 2 
 154 #define REDIS_RDB_ENCVAL 3 
 155 #define REDIS_RDB_LENERR UINT_MAX 
 157 /* When a length of a string object stored on disk has the first two bits 
 158  * set, the remaining two bits specify a special encoding for the object 
 159  * accordingly to the following defines: */ 
 160 #define REDIS_RDB_ENC_INT8 0        /* 8 bit signed integer */ 
 161 #define REDIS_RDB_ENC_INT16 1       /* 16 bit signed integer */ 
 162 #define REDIS_RDB_ENC_INT32 2       /* 32 bit signed integer */ 
 163 #define REDIS_RDB_ENC_LZF 3         /* string compressed with FASTLZ */ 
 165 /* Virtual memory object->where field. */ 
 166 #define REDIS_VM_MEMORY 0       /* The object is on memory */ 
 167 #define REDIS_VM_SWAPPED 1      /* The object is on disk */ 
 168 #define REDIS_VM_SWAPPING 2     /* Redis is swapping this object on disk */ 
 169 #define REDIS_VM_LOADING 3      /* Redis is loading this object from disk */ 
 171 /* Virtual memory static configuration stuff. 
 172  * Check vmFindContiguousPages() to know more about this magic numbers. */ 
 173 #define REDIS_VM_MAX_NEAR_PAGES 65536 
 174 #define REDIS_VM_MAX_RANDOM_JUMP 4096 
 175 #define REDIS_VM_MAX_THREADS 32 
 176 #define REDIS_THREAD_STACK_SIZE (1024*1024*4) 
 177 /* The following is the *percentage* of completed I/O jobs to process when the 
 178  * handelr is called. While Virtual Memory I/O operations are performed by 
 179  * threads, this operations must be processed by the main thread when completed 
 180  * in order to take effect. */ 
 181 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 
 184 #define REDIS_SLAVE 1       /* This client is a slave server */ 
 185 #define REDIS_MASTER 2      /* This client is a master server */ 
 186 #define REDIS_MONITOR 4     /* This client is a slave monitor, see MONITOR */ 
 187 #define REDIS_MULTI 8       /* This client is in a MULTI context */ 
 188 #define REDIS_BLOCKED 16    /* The client is waiting in a blocking operation */ 
 189 #define REDIS_IO_WAIT 32    /* The client is waiting for Virtual Memory I/O */ 
 191 /* Slave replication state - slave side */ 
 192 #define REDIS_REPL_NONE 0   /* No active replication */ 
 193 #define REDIS_REPL_CONNECT 1    /* Must connect to master */ 
 194 #define REDIS_REPL_CONNECTED 2  /* Connected to master */ 
 196 /* Slave replication state - from the point of view of master 
 197  * Note that in SEND_BULK and ONLINE state the slave receives new updates 
 198  * in its output queue. In the WAIT_BGSAVE state instead the server is waiting 
 199  * to start the next background saving in order to send updates to it. */ 
 200 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ 
 201 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ 
 202 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ 
 203 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ 
 205 /* List related stuff */ 
 209 /* Sort operations */ 
 210 #define REDIS_SORT_GET 0 
 211 #define REDIS_SORT_ASC 1 
 212 #define REDIS_SORT_DESC 2 
 213 #define REDIS_SORTKEY_MAX 1024 
 216 #define REDIS_DEBUG 0 
 217 #define REDIS_VERBOSE 1 
 218 #define REDIS_NOTICE 2 
 219 #define REDIS_WARNING 3 
 221 /* Anti-warning macro... */ 
 222 #define REDIS_NOTUSED(V) ((void) V) 
 224 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ 
 225 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */ 
 227 /* Append only defines */ 
 228 #define APPENDFSYNC_NO 0 
 229 #define APPENDFSYNC_ALWAYS 1 
 230 #define APPENDFSYNC_EVERYSEC 2 
 232 /* Hashes related defaults */ 
 233 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64 
 234 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 
 236 /* We can print the stacktrace, so our assert is defined this way: */ 
 237 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) 
 238 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1) 
 239 static void _redisAssert(char *estr
, char *file
, int line
); 
 240 static void _redisPanic(char *msg
, char *file
, int line
); 
 242 /*================================= Data types ============================== */ 
 244 /* A redis object, that is a type able to hold a string / list / set */ 
 246 /* The VM object structure */ 
 247 struct redisObjectVM 
{ 
 248     off_t page
;         /* the page at witch the object is stored on disk */ 
 249     off_t usedpages
;    /* number of pages used on disk */ 
 250     time_t atime
;       /* Last access time */ 
 253 /* The actual Redis Object */ 
 254 typedef struct redisObject 
{ 
 257     unsigned char encoding
; 
 258     unsigned char storage
;  /* If this object is a key, where is the value? 
 259                              * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */ 
 260     unsigned char vtype
; /* If this object is a key, and value is swapped out, 
 261                           * this is the type of the swapped out object. */ 
 263     /* VM fields, this are only allocated if VM is active, otherwise the 
 264      * object allocation function will just allocate 
 265      * sizeof(redisObjct) minus sizeof(redisObjectVM), so using 
 266      * Redis without VM active will not have any overhead. */ 
 267     struct redisObjectVM vm
; 
 270 /* Macro used to initalize a Redis object allocated on the stack. 
 271  * Note that this macro is taken near the structure definition to make sure 
 272  * we'll update it when the structure is changed, to avoid bugs like 
 273  * bug #85 introduced exactly in this way. */ 
 274 #define initStaticStringObject(_var,_ptr) do { \ 
 276     _var.type = REDIS_STRING; \ 
 277     _var.encoding = REDIS_ENCODING_RAW; \ 
 279     if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \ 
 282 typedef struct redisDb 
{ 
 283     dict 
*dict
;                 /* The keyspace for this DB */ 
 284     dict 
*expires
;              /* Timeout of keys with a timeout set */ 
 285     dict 
*blockingkeys
;         /* Keys with clients waiting for data (BLPOP) */ 
 286     dict 
*io_keys
;              /* Keys with clients waiting for VM I/O */ 
 290 /* Client MULTI/EXEC state */ 
 291 typedef struct multiCmd 
{ 
 294     struct redisCommand 
*cmd
; 
 297 typedef struct multiState 
{ 
 298     multiCmd 
*commands
;     /* Array of MULTI commands */ 
 299     int count
;              /* Total number of MULTI commands */ 
 302 /* With multiplexing we need to take per-clinet state. 
 303  * Clients are taken in a liked list. */ 
 304 typedef struct redisClient 
{ 
 309     robj 
**argv
, **mbargv
; 
 311     int bulklen
;            /* bulk read len. -1 if not in bulk read mode */ 
 312     int multibulk
;          /* multi bulk command format active */ 
 315     time_t lastinteraction
; /* time of the last interaction, used for timeout */ 
 316     int flags
;              /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ 
 317     int slaveseldb
;         /* slave selected db, if this client is a slave */ 
 318     int authenticated
;      /* when requirepass is non-NULL */ 
 319     int replstate
;          /* replication state if this is a slave */ 
 320     int repldbfd
;           /* replication DB file descriptor */ 
 321     long repldboff
;         /* replication DB file offset */ 
 322     off_t repldbsize
;       /* replication DB file size */ 
 323     multiState mstate
;      /* MULTI/EXEC state */ 
 324     robj 
**blockingkeys
;    /* The key we are waiting to terminate a blocking 
 325                              * operation such as BLPOP. Otherwise NULL. */ 
 326     int blockingkeysnum
;    /* Number of blocking keys */ 
 327     time_t blockingto
;      /* Blocking operation timeout. If UNIX current time 
 328                              * is >= blockingto then the operation timed out. */ 
 329     list 
*io_keys
;          /* Keys this client is waiting to be loaded from the 
 330                              * swap file in order to continue. */ 
 331     dict 
*pubsub_channels
;  /* channels a client is interested in (SUBSCRIBE) */ 
 332     list 
*pubsub_patterns
;  /* patterns a client is interested in (SUBSCRIBE) */ 
 340 /* Global server state structure */ 
 345     long long dirty
;            /* changes to DB from the last save */ 
 347     list 
*slaves
, *monitors
; 
 348     char neterr
[ANET_ERR_LEN
]; 
 350     int cronloops
;              /* number of times the cron function run */ 
 351     list 
*objfreelist
;          /* A list of freed objects to avoid malloc() */ 
 352     time_t lastsave
;            /* Unix time of last save succeeede */ 
 353     /* Fields used only for stats */ 
 354     time_t stat_starttime
;         /* server start time */ 
 355     long long stat_numcommands
;    /* number of processed commands */ 
 356     long long stat_numconnections
; /* number of connections received */ 
 357     long long stat_expiredkeys
;   /* number of expired keys */ 
 370     pid_t bgsavechildpid
; 
 371     pid_t bgrewritechildpid
; 
 372     sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */ 
 373     sds aofbuf
;       /* AOF buffer, written before entering the event loop */ 
 374     struct saveparam 
*saveparams
; 
 379     char *appendfilename
; 
 383     /* Replication related */ 
 388     redisClient 
*master
;    /* client that is master for this slave */ 
 390     unsigned int maxclients
; 
 391     unsigned long long maxmemory
; 
 392     unsigned int blpop_blocked_clients
; 
 393     unsigned int vm_blocked_clients
; 
 394     /* Sort parameters - qsort_r() is only available under BSD so we 
 395      * have to take this state global, in order to pass it to sortCompare() */ 
 399     /* Virtual memory configuration */ 
 404     unsigned long long vm_max_memory
; 
 406     size_t hash_max_zipmap_entries
; 
 407     size_t hash_max_zipmap_value
; 
 408     /* Virtual memory state */ 
 411     off_t vm_next_page
; /* Next probably empty page */ 
 412     off_t vm_near_pages
; /* Number of pages allocated sequentially */ 
 413     unsigned char *vm_bitmap
; /* Bitmap of free/used pages */ 
 414     time_t unixtime
;    /* Unix time sampled every second. */ 
 415     /* Virtual memory I/O threads stuff */ 
 416     /* An I/O thread process an element taken from the io_jobs queue and 
 417      * put the result of the operation in the io_done list. While the 
 418      * job is being processed, it's put on io_processing queue. */ 
 419     list 
*io_newjobs
; /* List of VM I/O jobs yet to be processed */ 
 420     list 
*io_processing
; /* List of VM I/O jobs being processed */ 
 421     list 
*io_processed
; /* List of VM I/O jobs already processed */ 
 422     list 
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */ 
 423     pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */ 
 424     pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */ 
 425     pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */ 
 426     pthread_attr_t io_threads_attr
; /* attributes for threads creation */ 
 427     int io_active_threads
; /* Number of running I/O threads */ 
 428     int vm_max_threads
; /* Max number of I/O threads running at the same time */ 
 429     /* Our main thread is blocked on the event loop, locking for sockets ready 
 430      * to be read or written, so when a threaded I/O operation is ready to be 
 431      * processed by the main thread, the I/O thread will use a unix pipe to 
 432      * awake the main thread. The followings are the two pipe FDs. */ 
 433     int io_ready_pipe_read
; 
 434     int io_ready_pipe_write
; 
 435     /* Virtual memory stats */ 
 436     unsigned long long vm_stats_used_pages
; 
 437     unsigned long long vm_stats_swapped_objects
; 
 438     unsigned long long vm_stats_swapouts
; 
 439     unsigned long long vm_stats_swapins
; 
 441     dict 
*pubsub_channels
; /* Map channels to list of subscribed clients */ 
 442     list 
*pubsub_patterns
; /* A list of pubsub_patterns */ 
 447 typedef struct pubsubPattern 
{ 
 452 typedef void redisCommandProc(redisClient 
*c
); 
 453 typedef void redisVmPreloadProc(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 454 struct redisCommand 
{ 
 456     redisCommandProc 
*proc
; 
 459     /* Use a function to determine which keys need to be loaded 
 460      * in the background prior to executing this command. Takes precedence 
 461      * over vm_firstkey and others, ignored when NULL */ 
 462     redisVmPreloadProc 
*vm_preload_proc
; 
 463     /* What keys should be loaded in background when calling this command? */ 
 464     int vm_firstkey
; /* The first argument that's a key (0 = no keys) */ 
 465     int vm_lastkey
;  /* THe last argument that's a key */ 
 466     int vm_keystep
;  /* The step between first and last key */ 
 469 struct redisFunctionSym 
{ 
 471     unsigned long pointer
; 
 474 typedef struct _redisSortObject 
{ 
 482 typedef struct _redisSortOperation 
{ 
 485 } redisSortOperation
; 
 487 /* ZSETs use a specialized version of Skiplists */ 
 489 typedef struct zskiplistNode 
{ 
 490     struct zskiplistNode 
**forward
; 
 491     struct zskiplistNode 
*backward
; 
 497 typedef struct zskiplist 
{ 
 498     struct zskiplistNode 
*header
, *tail
; 
 499     unsigned long length
; 
 503 typedef struct zset 
{ 
 508 /* Our shared "common" objects */ 
 510 #define REDIS_SHARED_INTEGERS 10000 
 511 struct sharedObjectsStruct 
{ 
 512     robj 
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
, 
 513     *colon
, *nullbulk
, *nullmultibulk
, *queued
, 
 514     *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
, 
 515     *outofrangeerr
, *plus
, 
 516     *select0
, *select1
, *select2
, *select3
, *select4
, 
 517     *select5
, *select6
, *select7
, *select8
, *select9
, 
 518     *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
, 
 519     *mbulk4
, *psubscribebulk
, *punsubscribebulk
, 
 520     *integers
[REDIS_SHARED_INTEGERS
]; 
 523 /* Global vars that are actally used as constants. The following double 
 524  * values are used for double on-disk serialization, and are initialized 
 525  * at runtime to avoid strange compiler optimizations. */ 
 527 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
; 
 529 /* VM threaded I/O request message */ 
 530 #define REDIS_IOJOB_LOAD 0          /* Load from disk to memory */ 
 531 #define REDIS_IOJOB_PREPARE_SWAP 1  /* Compute needed pages */ 
 532 #define REDIS_IOJOB_DO_SWAP 2       /* Swap from memory to disk */ 
 533 typedef struct iojob 
{ 
 534     int type
;   /* Request type, REDIS_IOJOB_* */ 
 535     redisDb 
*db
;/* Redis database */ 
 536     robj 
*key
;  /* This I/O request is about swapping this key */ 
 537     robj 
*val
;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this 
 538                  * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ 
 539     off_t page
; /* Swap page where to read/write the object */ 
 540     off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */ 
 541     int canceled
; /* True if this command was canceled by blocking side of VM */ 
 542     pthread_t thread
; /* ID of the thread processing this entry */ 
 545 /*================================ Prototypes =============================== */ 
 547 static void freeStringObject(robj 
*o
); 
 548 static void freeListObject(robj 
*o
); 
 549 static void freeSetObject(robj 
*o
); 
 550 static void decrRefCount(void *o
); 
 551 static robj 
*createObject(int type
, void *ptr
); 
 552 static void freeClient(redisClient 
*c
); 
 553 static int rdbLoad(char *filename
); 
 554 static void addReply(redisClient 
*c
, robj 
*obj
); 
 555 static void addReplySds(redisClient 
*c
, sds s
); 
 556 static void incrRefCount(robj 
*o
); 
 557 static int rdbSaveBackground(char *filename
); 
 558 static robj 
*createStringObject(char *ptr
, size_t len
); 
 559 static robj 
*dupStringObject(robj 
*o
); 
 560 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
); 
 561 static void replicationFeedMonitors(list 
*monitors
, int dictid
, robj 
**argv
, int argc
); 
 562 static void flushAppendOnlyFile(void); 
 563 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 564 static int syncWithMaster(void); 
 565 static robj 
*tryObjectEncoding(robj 
*o
); 
 566 static robj 
*getDecodedObject(robj 
*o
); 
 567 static int removeExpire(redisDb 
*db
, robj 
*key
); 
 568 static int expireIfNeeded(redisDb 
*db
, robj 
*key
); 
 569 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
); 
 570 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
); 
 571 static int deleteKey(redisDb 
*db
, robj 
*key
); 
 572 static time_t getExpire(redisDb 
*db
, robj 
*key
); 
 573 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
); 
 574 static void updateSlavesWaitingBgsave(int bgsaveerr
); 
 575 static void freeMemoryIfNeeded(void); 
 576 static int processCommand(redisClient 
*c
); 
 577 static void setupSigSegvAction(void); 
 578 static void rdbRemoveTempFile(pid_t childpid
); 
 579 static void aofRemoveTempFile(pid_t childpid
); 
 580 static size_t stringObjectLen(robj 
*o
); 
 581 static void processInputBuffer(redisClient 
*c
); 
 582 static zskiplist 
*zslCreate(void); 
 583 static void zslFree(zskiplist 
*zsl
); 
 584 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
); 
 585 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 586 static void initClientMultiState(redisClient 
*c
); 
 587 static void freeClientMultiState(redisClient 
*c
); 
 588 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
); 
 589 static void unblockClientWaitingData(redisClient 
*c
); 
 590 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
); 
 591 static void vmInit(void); 
 592 static void vmMarkPagesFree(off_t page
, off_t count
); 
 593 static robj 
*vmLoadObject(robj 
*key
); 
 594 static robj 
*vmPreviewObject(robj 
*key
); 
 595 static int vmSwapOneObjectBlocking(void); 
 596 static int vmSwapOneObjectThreaded(void); 
 597 static int vmCanSwapOut(void); 
 598 static int tryFreeOneObjectFromFreelist(void); 
 599 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 600 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 601 static void vmCancelThreadedIOJob(robj 
*o
); 
 602 static void lockThreadedIO(void); 
 603 static void unlockThreadedIO(void); 
 604 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
); 
 605 static void freeIOJob(iojob 
*j
); 
 606 static void queueIOJob(iojob 
*j
); 
 607 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
); 
 608 static robj 
*vmReadObjectFromSwap(off_t page
, int type
); 
 609 static void waitEmptyIOJobsQueue(void); 
 610 static void vmReopenSwapFile(void); 
 611 static int vmFreePage(off_t page
); 
 612 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 613 static void execBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 614 static int blockClientOnSwappedKeys(struct redisCommand 
*cmd
, redisClient 
*c
); 
 615 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
); 
 616 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
); 
 617 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 618 static struct redisCommand 
*lookupCommand(char *name
); 
 619 static void call(redisClient 
*c
, struct redisCommand 
*cmd
); 
 620 static void resetClient(redisClient 
*c
); 
 621 static void convertToRealHash(robj 
*o
); 
 622 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
); 
 623 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
); 
 624 static void freePubsubPattern(void *p
); 
 625 static int listMatchPubsubPattern(void *a
, void *b
); 
 626 static int compareStringObjects(robj 
*a
, robj 
*b
); 
 628 static int rewriteAppendOnlyFileBackground(void); 
 630 static void authCommand(redisClient 
*c
); 
 631 static void pingCommand(redisClient 
*c
); 
 632 static void echoCommand(redisClient 
*c
); 
 633 static void setCommand(redisClient 
*c
); 
 634 static void setnxCommand(redisClient 
*c
); 
 635 static void setexCommand(redisClient 
*c
); 
 636 static void getCommand(redisClient 
*c
); 
 637 static void delCommand(redisClient 
*c
); 
 638 static void existsCommand(redisClient 
*c
); 
 639 static void incrCommand(redisClient 
*c
); 
 640 static void decrCommand(redisClient 
*c
); 
 641 static void incrbyCommand(redisClient 
*c
); 
 642 static void decrbyCommand(redisClient 
*c
); 
 643 static void selectCommand(redisClient 
*c
); 
 644 static void randomkeyCommand(redisClient 
*c
); 
 645 static void keysCommand(redisClient 
*c
); 
 646 static void dbsizeCommand(redisClient 
*c
); 
 647 static void lastsaveCommand(redisClient 
*c
); 
 648 static void saveCommand(redisClient 
*c
); 
 649 static void bgsaveCommand(redisClient 
*c
); 
 650 static void bgrewriteaofCommand(redisClient 
*c
); 
 651 static void shutdownCommand(redisClient 
*c
); 
 652 static void moveCommand(redisClient 
*c
); 
 653 static void renameCommand(redisClient 
*c
); 
 654 static void renamenxCommand(redisClient 
*c
); 
 655 static void lpushCommand(redisClient 
*c
); 
 656 static void rpushCommand(redisClient 
*c
); 
 657 static void lpopCommand(redisClient 
*c
); 
 658 static void rpopCommand(redisClient 
*c
); 
 659 static void llenCommand(redisClient 
*c
); 
 660 static void lindexCommand(redisClient 
*c
); 
 661 static void lrangeCommand(redisClient 
*c
); 
 662 static void ltrimCommand(redisClient 
*c
); 
 663 static void typeCommand(redisClient 
*c
); 
 664 static void lsetCommand(redisClient 
*c
); 
 665 static void saddCommand(redisClient 
*c
); 
 666 static void sremCommand(redisClient 
*c
); 
 667 static void smoveCommand(redisClient 
*c
); 
 668 static void sismemberCommand(redisClient 
*c
); 
 669 static void scardCommand(redisClient 
*c
); 
 670 static void spopCommand(redisClient 
*c
); 
 671 static void srandmemberCommand(redisClient 
*c
); 
 672 static void sinterCommand(redisClient 
*c
); 
 673 static void sinterstoreCommand(redisClient 
*c
); 
 674 static void sunionCommand(redisClient 
*c
); 
 675 static void sunionstoreCommand(redisClient 
*c
); 
 676 static void sdiffCommand(redisClient 
*c
); 
 677 static void sdiffstoreCommand(redisClient 
*c
); 
 678 static void syncCommand(redisClient 
*c
); 
 679 static void flushdbCommand(redisClient 
*c
); 
 680 static void flushallCommand(redisClient 
*c
); 
 681 static void sortCommand(redisClient 
*c
); 
 682 static void lremCommand(redisClient 
*c
); 
 683 static void rpoplpushcommand(redisClient 
*c
); 
 684 static void infoCommand(redisClient 
*c
); 
 685 static void mgetCommand(redisClient 
*c
); 
 686 static void monitorCommand(redisClient 
*c
); 
 687 static void expireCommand(redisClient 
*c
); 
 688 static void expireatCommand(redisClient 
*c
); 
 689 static void getsetCommand(redisClient 
*c
); 
 690 static void ttlCommand(redisClient 
*c
); 
 691 static void slaveofCommand(redisClient 
*c
); 
 692 static void debugCommand(redisClient 
*c
); 
 693 static void msetCommand(redisClient 
*c
); 
 694 static void msetnxCommand(redisClient 
*c
); 
 695 static void zaddCommand(redisClient 
*c
); 
 696 static void zincrbyCommand(redisClient 
*c
); 
 697 static void zrangeCommand(redisClient 
*c
); 
 698 static void zrangebyscoreCommand(redisClient 
*c
); 
 699 static void zcountCommand(redisClient 
*c
); 
 700 static void zrevrangeCommand(redisClient 
*c
); 
 701 static void zcardCommand(redisClient 
*c
); 
 702 static void zremCommand(redisClient 
*c
); 
 703 static void zscoreCommand(redisClient 
*c
); 
 704 static void zremrangebyscoreCommand(redisClient 
*c
); 
 705 static void multiCommand(redisClient 
*c
); 
 706 static void execCommand(redisClient 
*c
); 
 707 static void discardCommand(redisClient 
*c
); 
 708 static void blpopCommand(redisClient 
*c
); 
 709 static void brpopCommand(redisClient 
*c
); 
 710 static void appendCommand(redisClient 
*c
); 
 711 static void substrCommand(redisClient 
*c
); 
 712 static void zrankCommand(redisClient 
*c
); 
 713 static void zrevrankCommand(redisClient 
*c
); 
 714 static void hsetCommand(redisClient 
*c
); 
 715 static void hsetnxCommand(redisClient 
*c
); 
 716 static void hgetCommand(redisClient 
*c
); 
 717 static void hmsetCommand(redisClient 
*c
); 
 718 static void hmgetCommand(redisClient 
*c
); 
 719 static void hdelCommand(redisClient 
*c
); 
 720 static void hlenCommand(redisClient 
*c
); 
 721 static void zremrangebyrankCommand(redisClient 
*c
); 
 722 static void zunionCommand(redisClient 
*c
); 
 723 static void zinterCommand(redisClient 
*c
); 
 724 static void hkeysCommand(redisClient 
*c
); 
 725 static void hvalsCommand(redisClient 
*c
); 
 726 static void hgetallCommand(redisClient 
*c
); 
 727 static void hexistsCommand(redisClient 
*c
); 
 728 static void configCommand(redisClient 
*c
); 
 729 static void hincrbyCommand(redisClient 
*c
); 
 730 static void subscribeCommand(redisClient 
*c
); 
 731 static void unsubscribeCommand(redisClient 
*c
); 
 732 static void psubscribeCommand(redisClient 
*c
); 
 733 static void punsubscribeCommand(redisClient 
*c
); 
 734 static void publishCommand(redisClient 
*c
); 
 736 /*================================= Globals ================================= */ 
 739 static struct redisServer server
; /* server global state */ 
 740 static struct redisCommand cmdTable
[] = { 
 741     {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 742     {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 743     {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 744     {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 745     {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 746     {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 747     {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 748     {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 749     {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 750     {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 751     {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1}, 
 752     {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 753     {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 754     {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 755     {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 756     {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 757     {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 758     {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 759     {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 760     {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 761     {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 762     {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 763     {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 764     {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1}, 
 765     {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 766     {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 767     {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1}, 
 768     {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 769     {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 770     {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 771     {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 772     {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 773     {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 774     {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 775     {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 776     {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 777     {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 778     {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 779     {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 780     {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 781     {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 782     {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 783     {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 784     {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 785     {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 786     {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 787     {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 788     {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 789     {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 790     {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 791     {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 792     {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 793     {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 794     {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 795     {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 796     {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 797     {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 798     {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 799     {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 800     {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 801     {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 802     {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 803     {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 804     {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 805     {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 806     {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 807     {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 808     {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 809     {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 810     {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 811     {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 812     {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 813     {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 814     {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 815     {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 816     {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 817     {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 818     {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 819     {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 820     {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 821     {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 822     {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 823     {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 824     {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 825     {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 826     {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 827     {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 828     {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 829     {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 830     {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0}, 
 831     {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 832     {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 833     {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 834     {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 835     {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 836     {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 837     {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 838     {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 839     {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 840     {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 841     {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 842     {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 843     {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 844     {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 845     {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 846     {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0}, 
 847     {NULL
,NULL
,0,0,NULL
,0,0,0} 
 850 /*============================ Utility functions ============================ */ 
 852 /* Glob-style pattern matching. */ 
 853 static int stringmatchlen(const char *pattern
, int patternLen
, 
 854         const char *string
, int stringLen
, int nocase
) 
 859             while (pattern
[1] == '*') { 
 864                 return 1; /* match */ 
 866                 if (stringmatchlen(pattern
+1, patternLen
-1, 
 867                             string
, stringLen
, nocase
)) 
 868                     return 1; /* match */ 
 872             return 0; /* no match */ 
 876                 return 0; /* no match */ 
 886             not = pattern
[0] == '^'; 
 893                 if (pattern
[0] == '\\') { 
 896                     if (pattern
[0] == string
[0]) 
 898                 } else if (pattern
[0] == ']') { 
 900                 } else if (patternLen 
== 0) { 
 904                 } else if (pattern
[1] == '-' && patternLen 
>= 3) { 
 905                     int start 
= pattern
[0]; 
 906                     int end 
= pattern
[2]; 
 914                         start 
= tolower(start
); 
 920                     if (c 
>= start 
&& c 
<= end
) 
 924                         if (pattern
[0] == string
[0]) 
 927                         if (tolower((int)pattern
[0]) == tolower((int)string
[0])) 
 937                 return 0; /* no match */ 
 943             if (patternLen 
>= 2) { 
 950                 if (pattern
[0] != string
[0]) 
 951                     return 0; /* no match */ 
 953                 if (tolower((int)pattern
[0]) != tolower((int)string
[0])) 
 954                     return 0; /* no match */ 
 962         if (stringLen 
== 0) { 
 963             while(*pattern 
== '*') { 
 970     if (patternLen 
== 0 && stringLen 
== 0) 
 975 static int stringmatch(const char *pattern
, const char *string
, int nocase
) { 
 976     return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
); 
 979 /* Convert a string representing an amount of memory into the number of 
 980  * bytes, so for instance memtoll("1Gi") will return 1073741824 that is 
 983  * On parsing error, if *err is not NULL, it's set to 1, otherwise it's 
 985 static long long memtoll(const char *p
, int *err
) { 
 988     long mul
; /* unit multiplier */ 
 993     /* Search the first non digit character. */ 
 996     while(*u 
&& isdigit(*u
)) u
++; 
 997     if (*u 
== '\0' || !strcasecmp(u
,"b")) { 
 999     } else if (!strcasecmp(u
,"k")) { 
1001     } else if (!strcasecmp(u
,"kb")) { 
1003     } else if (!strcasecmp(u
,"m")) { 
1005     } else if (!strcasecmp(u
,"mb")) { 
1007     } else if (!strcasecmp(u
,"g")) { 
1008         mul 
= 1000L*1000*1000; 
1009     } else if (!strcasecmp(u
,"gb")) { 
1010         mul 
= 1024L*1024*1024; 
1016     if (digits 
>= sizeof(buf
)) { 
1020     memcpy(buf
,p
,digits
); 
1022     val 
= strtoll(buf
,NULL
,10); 
1026 static void redisLog(int level
, const char *fmt
, ...) { 
1030     fp 
= (server
.logfile 
== NULL
) ? stdout 
: fopen(server
.logfile
,"a"); 
1034     if (level 
>= server
.verbosity
) { 
1040         strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
)); 
1041         fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]); 
1042         vfprintf(fp
, fmt
, ap
); 
1048     if (server
.logfile
) fclose(fp
); 
1051 /*====================== Hash table type implementation  ==================== */ 
1053 /* This is an hash table type that uses the SDS dynamic strings libary as 
1054  * keys and radis objects as values (objects can hold SDS strings, 
1057 static void dictVanillaFree(void *privdata
, void *val
) 
1059     DICT_NOTUSED(privdata
); 
1063 static void dictListDestructor(void *privdata
, void *val
) 
1065     DICT_NOTUSED(privdata
); 
1066     listRelease((list
*)val
); 
1069 static int sdsDictKeyCompare(void *privdata
, const void *key1
, 
1073     DICT_NOTUSED(privdata
); 
1075     l1 
= sdslen((sds
)key1
); 
1076     l2 
= sdslen((sds
)key2
); 
1077     if (l1 
!= l2
) return 0; 
1078     return memcmp(key1
, key2
, l1
) == 0; 
1081 static void dictRedisObjectDestructor(void *privdata
, void *val
) 
1083     DICT_NOTUSED(privdata
); 
1085     if (val 
== NULL
) return; /* Values of swapped out keys as set to NULL */ 
1089 static int dictObjKeyCompare(void *privdata
, const void *key1
, 
1092     const robj 
*o1 
= key1
, *o2 
= key2
; 
1093     return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1096 static unsigned int dictObjHash(const void *key
) { 
1097     const robj 
*o 
= key
; 
1098     return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1101 static int dictEncObjKeyCompare(void *privdata
, const void *key1
, 
1104     robj 
*o1 
= (robj
*) key1
, *o2 
= (robj
*) key2
; 
1107     if (o1
->encoding 
== REDIS_ENCODING_INT 
&& 
1108         o2
->encoding 
== REDIS_ENCODING_INT 
&& 
1109         o1
->ptr 
== o2
->ptr
) return 1; 
1111     o1 
= getDecodedObject(o1
); 
1112     o2 
= getDecodedObject(o2
); 
1113     cmp 
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1119 static unsigned int dictEncObjHash(const void *key
) { 
1120     robj 
*o 
= (robj
*) key
; 
1122     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
1123         return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1125         if (o
->encoding 
== REDIS_ENCODING_INT
) { 
1129             len 
= snprintf(buf
,32,"%ld",(long)o
->ptr
); 
1130             return dictGenHashFunction((unsigned char*)buf
, len
); 
1134             o 
= getDecodedObject(o
); 
1135             hash 
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1142 /* Sets type and expires */ 
1143 static dictType setDictType 
= { 
1144     dictEncObjHash
,            /* hash function */ 
1147     dictEncObjKeyCompare
,      /* key compare */ 
1148     dictRedisObjectDestructor
, /* key destructor */ 
1149     NULL                       
/* val destructor */ 
1152 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ 
1153 static dictType zsetDictType 
= { 
1154     dictEncObjHash
,            /* hash function */ 
1157     dictEncObjKeyCompare
,      /* key compare */ 
1158     dictRedisObjectDestructor
, /* key destructor */ 
1159     dictVanillaFree            
/* val destructor of malloc(sizeof(double)) */ 
1163 static dictType dbDictType 
= { 
1164     dictObjHash
,                /* hash function */ 
1167     dictObjKeyCompare
,          /* key compare */ 
1168     dictRedisObjectDestructor
,  /* key destructor */ 
1169     dictRedisObjectDestructor   
/* val destructor */ 
1173 static dictType keyptrDictType 
= { 
1174     dictObjHash
,               /* hash function */ 
1177     dictObjKeyCompare
,         /* key compare */ 
1178     dictRedisObjectDestructor
, /* key destructor */ 
1179     NULL                       
/* val destructor */ 
1182 /* Hash type hash table (note that small hashes are represented with zimpaps) */ 
1183 static dictType hashDictType 
= { 
1184     dictEncObjHash
,             /* hash function */ 
1187     dictEncObjKeyCompare
,       /* key compare */ 
1188     dictRedisObjectDestructor
,  /* key destructor */ 
1189     dictRedisObjectDestructor   
/* val destructor */ 
1192 /* Keylist hash table type has unencoded redis objects as keys and 
1193  * lists as values. It's used for blocking operations (BLPOP) and to 
1194  * map swapped keys to a list of clients waiting for this keys to be loaded. */ 
1195 static dictType keylistDictType 
= { 
1196     dictObjHash
,                /* hash function */ 
1199     dictObjKeyCompare
,          /* key compare */ 
1200     dictRedisObjectDestructor
,  /* key destructor */ 
1201     dictListDestructor          
/* val destructor */ 
1204 static void version(); 
1206 /* ========================= Random utility functions ======================= */ 
1208 /* Redis generally does not try to recover from out of memory conditions 
1209  * when allocating objects or strings, it is not clear if it will be possible 
1210  * to report this condition to the client since the networking layer itself 
1211  * is based on heap allocation for send buffers, so we simply abort. 
1212  * At least the code will be simpler to read... */ 
1213 static void oom(const char *msg
) { 
1214     redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
); 
1219 /* ====================== Redis server networking stuff ===================== */ 
1220 static void closeTimedoutClients(void) { 
1223     time_t now 
= time(NULL
); 
1226     listRewind(server
.clients
,&li
); 
1227     while ((ln 
= listNext(&li
)) != NULL
) { 
1228         c 
= listNodeValue(ln
); 
1229         if (server
.maxidletime 
&& 
1230             !(c
->flags 
& REDIS_SLAVE
) &&    /* no timeout for slaves */ 
1231             !(c
->flags 
& REDIS_MASTER
) &&   /* no timeout for masters */ 
1232             dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */ 
1233             listLength(c
->pubsub_patterns
) == 0 && 
1234             (now 
- c
->lastinteraction 
> server
.maxidletime
)) 
1236             redisLog(REDIS_VERBOSE
,"Closing idle client"); 
1238         } else if (c
->flags 
& REDIS_BLOCKED
) { 
1239             if (c
->blockingto 
!= 0 && c
->blockingto 
< now
) { 
1240                 addReply(c
,shared
.nullmultibulk
); 
1241                 unblockClientWaitingData(c
); 
1247 static int htNeedsResize(dict 
*dict
) { 
1248     long long size
, used
; 
1250     size 
= dictSlots(dict
); 
1251     used 
= dictSize(dict
); 
1252     return (size 
&& used 
&& size 
> DICT_HT_INITIAL_SIZE 
&& 
1253             (used
*100/size 
< REDIS_HT_MINFILL
)); 
1256 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL 
1257  * we resize the hash table to save memory */ 
1258 static void tryResizeHashTables(void) { 
1261     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1262         if (htNeedsResize(server
.db
[j
].dict
)) 
1263             dictResize(server
.db
[j
].dict
); 
1264         if (htNeedsResize(server
.db
[j
].expires
)) 
1265             dictResize(server
.db
[j
].expires
); 
1269 /* Our hash table implementation performs rehashing incrementally while 
1270  * we write/read from the hash table. Still if the server is idle, the hash 
1271  * table will use two tables for a long time. So we try to use 1 millisecond 
1272  * of CPU time at every serverCron() loop in order to rehash some key. */ 
1273 static void incrementallyRehash(void) { 
1276     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1277         if (dictIsRehashing(server
.db
[j
].dict
)) { 
1278             dictRehashMilliseconds(server
.db
[j
].dict
,1); 
1279             break; /* already used our millisecond for this loop... */ 
1284 /* A background saving child (BGSAVE) terminated its work. Handle this. */ 
1285 void backgroundSaveDoneHandler(int statloc
) { 
1286     int exitcode 
= WEXITSTATUS(statloc
); 
1287     int bysignal 
= WIFSIGNALED(statloc
); 
1289     if (!bysignal 
&& exitcode 
== 0) { 
1290         redisLog(REDIS_NOTICE
, 
1291             "Background saving terminated with success"); 
1293         server
.lastsave 
= time(NULL
); 
1294     } else if (!bysignal 
&& exitcode 
!= 0) { 
1295         redisLog(REDIS_WARNING
, "Background saving error"); 
1297         redisLog(REDIS_WARNING
, 
1298             "Background saving terminated by signal %d", WTERMSIG(statloc
)); 
1299         rdbRemoveTempFile(server
.bgsavechildpid
); 
1301     server
.bgsavechildpid 
= -1; 
1302     /* Possibly there are slaves waiting for a BGSAVE in order to be served 
1303      * (the first stage of SYNC is a bulk transfer of dump.rdb) */ 
1304     updateSlavesWaitingBgsave(exitcode 
== 0 ? REDIS_OK 
: REDIS_ERR
); 
1307 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. 
1309 void backgroundRewriteDoneHandler(int statloc
) { 
1310     int exitcode 
= WEXITSTATUS(statloc
); 
1311     int bysignal 
= WIFSIGNALED(statloc
); 
1313     if (!bysignal 
&& exitcode 
== 0) { 
1317         redisLog(REDIS_NOTICE
, 
1318             "Background append only file rewriting terminated with success"); 
1319         /* Now it's time to flush the differences accumulated by the parent */ 
1320         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
); 
1321         fd 
= open(tmpfile
,O_WRONLY
|O_APPEND
); 
1323             redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
)); 
1326         /* Flush our data... */ 
1327         if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) != 
1328                 (signed) sdslen(server
.bgrewritebuf
)) { 
1329             redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
)); 
1333         redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
)); 
1334         /* Now our work is to rename the temp file into the stable file. And 
1335          * switch the file descriptor used by the server for append only. */ 
1336         if (rename(tmpfile
,server
.appendfilename
) == -1) { 
1337             redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
)); 
1341         /* Mission completed... almost */ 
1342         redisLog(REDIS_NOTICE
,"Append only file successfully rewritten."); 
1343         if (server
.appendfd 
!= -1) { 
1344             /* If append only is actually enabled... */ 
1345             close(server
.appendfd
); 
1346             server
.appendfd 
= fd
; 
1348             server
.appendseldb 
= -1; /* Make sure it will issue SELECT */ 
1349             redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends."); 
1351             /* If append only is disabled we just generate a dump in this 
1352              * format. Why not? */ 
1355     } else if (!bysignal 
&& exitcode 
!= 0) { 
1356         redisLog(REDIS_WARNING
, "Background append only file rewriting error"); 
1358         redisLog(REDIS_WARNING
, 
1359             "Background append only file rewriting terminated by signal %d", 
1363     sdsfree(server
.bgrewritebuf
); 
1364     server
.bgrewritebuf 
= sdsempty(); 
1365     aofRemoveTempFile(server
.bgrewritechildpid
); 
1366     server
.bgrewritechildpid 
= -1; 
1369 /* This function is called once a background process of some kind terminates, 
1370  * as we want to avoid resizing the hash tables when there is a child in order 
1371  * to play well with copy-on-write (otherwise when a resize happens lots of 
1372  * memory pages are copied). The goal of this function is to update the ability 
1373  * for dict.c to resize the hash tables accordingly to the fact we have o not 
1374  * running childs. */ 
1375 static void updateDictResizePolicy(void) { 
1376     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) 
1379         dictDisableResize(); 
1382 static int serverCron(struct aeEventLoop 
*eventLoop
, long long id
, void *clientData
) { 
1383     int j
, loops 
= server
.cronloops
++; 
1384     REDIS_NOTUSED(eventLoop
); 
1386     REDIS_NOTUSED(clientData
); 
1388     /* We take a cached value of the unix time in the global state because 
1389      * with virtual memory and aging there is to store the current time 
1390      * in objects at every object access, and accuracy is not needed. 
1391      * To access a global var is faster than calling time(NULL) */ 
1392     server
.unixtime 
= time(NULL
); 
1394     /* Show some info about non-empty databases */ 
1395     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1396         long long size
, used
, vkeys
; 
1398         size 
= dictSlots(server
.db
[j
].dict
); 
1399         used 
= dictSize(server
.db
[j
].dict
); 
1400         vkeys 
= dictSize(server
.db
[j
].expires
); 
1401         if (!(loops 
% 50) && (used 
|| vkeys
)) { 
1402             redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
); 
1403             /* dictPrintStats(server.dict); */ 
1407     /* We don't want to resize the hash tables while a bacground saving 
1408      * is in progress: the saving child is created using fork() that is 
1409      * implemented with a copy-on-write semantic in most modern systems, so 
1410      * if we resize the HT while there is the saving child at work actually 
1411      * a lot of memory movements in the parent will cause a lot of pages 
1413     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) { 
1414         if (!(loops 
% 10)) tryResizeHashTables(); 
1415         if (server
.activerehashing
) incrementallyRehash(); 
1418     /* Show information about connected clients */ 
1419     if (!(loops 
% 50)) { 
1420         redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use", 
1421             listLength(server
.clients
)-listLength(server
.slaves
), 
1422             listLength(server
.slaves
), 
1423             zmalloc_used_memory()); 
1426     /* Close connections of timedout clients */ 
1427     if ((server
.maxidletime 
&& !(loops 
% 100)) || server
.blpop_blocked_clients
) 
1428         closeTimedoutClients(); 
1430     /* Check if a background saving or AOF rewrite in progress terminated */ 
1431     if (server
.bgsavechildpid 
!= -1 || server
.bgrewritechildpid 
!= -1) { 
1435         if ((pid 
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) { 
1436             if (pid 
== server
.bgsavechildpid
) { 
1437                 backgroundSaveDoneHandler(statloc
); 
1439                 backgroundRewriteDoneHandler(statloc
); 
1441             updateDictResizePolicy(); 
1444         /* If there is not a background saving in progress check if 
1445          * we have to save now */ 
1446          time_t now 
= time(NULL
); 
1447          for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
1448             struct saveparam 
*sp 
= server
.saveparams
+j
; 
1450             if (server
.dirty 
>= sp
->changes 
&& 
1451                 now
-server
.lastsave 
> sp
->seconds
) { 
1452                 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...", 
1453                     sp
->changes
, sp
->seconds
); 
1454                 rdbSaveBackground(server
.dbfilename
); 
1460     /* Try to expire a few timed out keys. The algorithm used is adaptive and 
1461      * will use few CPU cycles if there are few expiring keys, otherwise 
1462      * it will get more aggressive to avoid that too much memory is used by 
1463      * keys that can be removed from the keyspace. */ 
1464     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1466         redisDb 
*db 
= server
.db
+j
; 
1468         /* Continue to expire if at the end of the cycle more than 25% 
1469          * of the keys were expired. */ 
1471             long num 
= dictSize(db
->expires
); 
1472             time_t now 
= time(NULL
); 
1475             if (num 
> REDIS_EXPIRELOOKUPS_PER_CRON
) 
1476                 num 
= REDIS_EXPIRELOOKUPS_PER_CRON
; 
1481                 if ((de 
= dictGetRandomKey(db
->expires
)) == NULL
) break; 
1482                 t 
= (time_t) dictGetEntryVal(de
); 
1484                     deleteKey(db
,dictGetEntryKey(de
)); 
1486                     server
.stat_expiredkeys
++; 
1489         } while (expired 
> REDIS_EXPIRELOOKUPS_PER_CRON
/4); 
1492     /* Swap a few keys on disk if we are over the memory limit and VM 
1493      * is enbled. Try to free objects from the free list first. */ 
1494     if (vmCanSwapOut()) { 
1495         while (server
.vm_enabled 
&& zmalloc_used_memory() > 
1496                 server
.vm_max_memory
) 
1500             if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
1501             retval 
= (server
.vm_max_threads 
== 0) ? 
1502                         vmSwapOneObjectBlocking() : 
1503                         vmSwapOneObjectThreaded(); 
1504             if (retval 
== REDIS_ERR 
&& !(loops 
% 300) && 
1505                 zmalloc_used_memory() > 
1506                 (server
.vm_max_memory
+server
.vm_max_memory
/10)) 
1508                 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!"); 
1510             /* Note that when using threade I/O we free just one object, 
1511              * because anyway when the I/O thread in charge to swap this 
1512              * object out will finish, the handler of completed jobs 
1513              * will try to swap more objects if we are still out of memory. */ 
1514             if (retval 
== REDIS_ERR 
|| server
.vm_max_threads 
> 0) break; 
1518     /* Check if we should connect to a MASTER */ 
1519     if (server
.replstate 
== REDIS_REPL_CONNECT 
&& !(loops 
% 10)) { 
1520         redisLog(REDIS_NOTICE
,"Connecting to MASTER..."); 
1521         if (syncWithMaster() == REDIS_OK
) { 
1522             redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded"); 
1523             if (server
.appendonly
) rewriteAppendOnlyFileBackground(); 
1529 /* This function gets called every time Redis is entering the 
1530  * main loop of the event driven library, that is, before to sleep 
1531  * for ready file descriptors. */ 
1532 static void beforeSleep(struct aeEventLoop 
*eventLoop
) { 
1533     REDIS_NOTUSED(eventLoop
); 
1535     /* Awake clients that got all the swapped keys they requested */ 
1536     if (server
.vm_enabled 
&& listLength(server
.io_ready_clients
)) { 
1540         listRewind(server
.io_ready_clients
,&li
); 
1541         while((ln 
= listNext(&li
))) { 
1542             redisClient 
*c 
= ln
->value
; 
1543             struct redisCommand 
*cmd
; 
1545             /* Resume the client. */ 
1546             listDelNode(server
.io_ready_clients
,ln
); 
1547             c
->flags 
&= (~REDIS_IO_WAIT
); 
1548             server
.vm_blocked_clients
--; 
1549             aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
1550                 readQueryFromClient
, c
); 
1551             cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
1552             assert(cmd 
!= NULL
); 
1555             /* There may be more data to process in the input buffer. */ 
1556             if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) 
1557                 processInputBuffer(c
); 
1560     /* Write the AOF buffer on disk */ 
1561     flushAppendOnlyFile(); 
1564 static void createSharedObjects(void) { 
1567     shared
.crlf 
= createObject(REDIS_STRING
,sdsnew("\r\n")); 
1568     shared
.ok 
= createObject(REDIS_STRING
,sdsnew("+OK\r\n")); 
1569     shared
.err 
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n")); 
1570     shared
.emptybulk 
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n")); 
1571     shared
.czero 
= createObject(REDIS_STRING
,sdsnew(":0\r\n")); 
1572     shared
.cone 
= createObject(REDIS_STRING
,sdsnew(":1\r\n")); 
1573     shared
.nullbulk 
= createObject(REDIS_STRING
,sdsnew("$-1\r\n")); 
1574     shared
.nullmultibulk 
= createObject(REDIS_STRING
,sdsnew("*-1\r\n")); 
1575     shared
.emptymultibulk 
= createObject(REDIS_STRING
,sdsnew("*0\r\n")); 
1576     shared
.pong 
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n")); 
1577     shared
.queued 
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n")); 
1578     shared
.wrongtypeerr 
= createObject(REDIS_STRING
,sdsnew( 
1579         "-ERR Operation against a key holding the wrong kind of value\r\n")); 
1580     shared
.nokeyerr 
= createObject(REDIS_STRING
,sdsnew( 
1581         "-ERR no such key\r\n")); 
1582     shared
.syntaxerr 
= createObject(REDIS_STRING
,sdsnew( 
1583         "-ERR syntax error\r\n")); 
1584     shared
.sameobjecterr 
= createObject(REDIS_STRING
,sdsnew( 
1585         "-ERR source and destination objects are the same\r\n")); 
1586     shared
.outofrangeerr 
= createObject(REDIS_STRING
,sdsnew( 
1587         "-ERR index out of range\r\n")); 
1588     shared
.space 
= createObject(REDIS_STRING
,sdsnew(" ")); 
1589     shared
.colon 
= createObject(REDIS_STRING
,sdsnew(":")); 
1590     shared
.plus 
= createObject(REDIS_STRING
,sdsnew("+")); 
1591     shared
.select0 
= createStringObject("select 0\r\n",10); 
1592     shared
.select1 
= createStringObject("select 1\r\n",10); 
1593     shared
.select2 
= createStringObject("select 2\r\n",10); 
1594     shared
.select3 
= createStringObject("select 3\r\n",10); 
1595     shared
.select4 
= createStringObject("select 4\r\n",10); 
1596     shared
.select5 
= createStringObject("select 5\r\n",10); 
1597     shared
.select6 
= createStringObject("select 6\r\n",10); 
1598     shared
.select7 
= createStringObject("select 7\r\n",10); 
1599     shared
.select8 
= createStringObject("select 8\r\n",10); 
1600     shared
.select9 
= createStringObject("select 9\r\n",10); 
1601     shared
.messagebulk 
= createStringObject("$7\r\nmessage\r\n",13); 
1602     shared
.pmessagebulk 
= createStringObject("$8\r\npmessage\r\n",14); 
1603     shared
.subscribebulk 
= createStringObject("$9\r\nsubscribe\r\n",15); 
1604     shared
.unsubscribebulk 
= createStringObject("$11\r\nunsubscribe\r\n",18); 
1605     shared
.psubscribebulk 
= createStringObject("$10\r\npsubscribe\r\n",17); 
1606     shared
.punsubscribebulk 
= createStringObject("$12\r\npunsubscribe\r\n",19); 
1607     shared
.mbulk3 
= createStringObject("*3\r\n",4); 
1608     shared
.mbulk4 
= createStringObject("*4\r\n",4); 
1609     for (j 
= 0; j 
< REDIS_SHARED_INTEGERS
; j
++) { 
1610         shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
); 
1611         shared
.integers
[j
]->encoding 
= REDIS_ENCODING_INT
; 
1615 static void appendServerSaveParams(time_t seconds
, int changes
) { 
1616     server
.saveparams 
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1)); 
1617     server
.saveparams
[server
.saveparamslen
].seconds 
= seconds
; 
1618     server
.saveparams
[server
.saveparamslen
].changes 
= changes
; 
1619     server
.saveparamslen
++; 
1622 static void resetServerSaveParams() { 
1623     zfree(server
.saveparams
); 
1624     server
.saveparams 
= NULL
; 
1625     server
.saveparamslen 
= 0; 
1628 static void initServerConfig() { 
1629     server
.dbnum 
= REDIS_DEFAULT_DBNUM
; 
1630     server
.port 
= REDIS_SERVERPORT
; 
1631     server
.verbosity 
= REDIS_VERBOSE
; 
1632     server
.maxidletime 
= REDIS_MAXIDLETIME
; 
1633     server
.saveparams 
= NULL
; 
1634     server
.logfile 
= NULL
; /* NULL = log on standard output */ 
1635     server
.bindaddr 
= NULL
; 
1636     server
.glueoutputbuf 
= 1; 
1637     server
.daemonize 
= 0; 
1638     server
.appendonly 
= 0; 
1639     server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1640     server
.lastfsync 
= time(NULL
); 
1641     server
.appendfd 
= -1; 
1642     server
.appendseldb 
= -1; /* Make sure the first time will not match */ 
1643     server
.pidfile 
= zstrdup("/var/run/redis.pid"); 
1644     server
.dbfilename 
= zstrdup("dump.rdb"); 
1645     server
.appendfilename 
= zstrdup("appendonly.aof"); 
1646     server
.requirepass 
= NULL
; 
1647     server
.rdbcompression 
= 1; 
1648     server
.activerehashing 
= 1; 
1649     server
.maxclients 
= 0; 
1650     server
.blpop_blocked_clients 
= 0; 
1651     server
.maxmemory 
= 0; 
1652     server
.vm_enabled 
= 0; 
1653     server
.vm_swap_file 
= zstrdup("/tmp/redis-%p.vm"); 
1654     server
.vm_page_size 
= 256;          /* 256 bytes per page */ 
1655     server
.vm_pages 
= 1024*1024*100;    /* 104 millions of pages */ 
1656     server
.vm_max_memory 
= 1024LL*1024*1024*1; /* 1 GB of RAM */ 
1657     server
.vm_max_threads 
= 4; 
1658     server
.vm_blocked_clients 
= 0; 
1659     server
.hash_max_zipmap_entries 
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
; 
1660     server
.hash_max_zipmap_value 
= REDIS_HASH_MAX_ZIPMAP_VALUE
; 
1662     resetServerSaveParams(); 
1664     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */ 
1665     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */ 
1666     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ 
1667     /* Replication related */ 
1669     server
.masterauth 
= NULL
; 
1670     server
.masterhost 
= NULL
; 
1671     server
.masterport 
= 6379; 
1672     server
.master 
= NULL
; 
1673     server
.replstate 
= REDIS_REPL_NONE
; 
1675     /* Double constants initialization */ 
1677     R_PosInf 
= 1.0/R_Zero
; 
1678     R_NegInf 
= -1.0/R_Zero
; 
1679     R_Nan 
= R_Zero
/R_Zero
; 
1682 static void initServer() { 
1685     signal(SIGHUP
, SIG_IGN
); 
1686     signal(SIGPIPE
, SIG_IGN
); 
1687     setupSigSegvAction(); 
1689     server
.devnull 
= fopen("/dev/null","w"); 
1690     if (server
.devnull 
== NULL
) { 
1691         redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
); 
1694     server
.clients 
= listCreate(); 
1695     server
.slaves 
= listCreate(); 
1696     server
.monitors 
= listCreate(); 
1697     server
.objfreelist 
= listCreate(); 
1698     createSharedObjects(); 
1699     server
.el 
= aeCreateEventLoop(); 
1700     server
.db 
= zmalloc(sizeof(redisDb
)*server
.dbnum
); 
1701     server
.fd 
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
); 
1702     if (server
.fd 
== -1) { 
1703         redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
); 
1706     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1707         server
.db
[j
].dict 
= dictCreate(&dbDictType
,NULL
); 
1708         server
.db
[j
].expires 
= dictCreate(&keyptrDictType
,NULL
); 
1709         server
.db
[j
].blockingkeys 
= dictCreate(&keylistDictType
,NULL
); 
1710         if (server
.vm_enabled
) 
1711             server
.db
[j
].io_keys 
= dictCreate(&keylistDictType
,NULL
); 
1712         server
.db
[j
].id 
= j
; 
1714     server
.pubsub_channels 
= dictCreate(&keylistDictType
,NULL
); 
1715     server
.pubsub_patterns 
= listCreate(); 
1716     listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
); 
1717     listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
); 
1718     server
.cronloops 
= 0; 
1719     server
.bgsavechildpid 
= -1; 
1720     server
.bgrewritechildpid 
= -1; 
1721     server
.bgrewritebuf 
= sdsempty(); 
1722     server
.aofbuf 
= sdsempty(); 
1723     server
.lastsave 
= time(NULL
); 
1725     server
.stat_numcommands 
= 0; 
1726     server
.stat_numconnections 
= 0; 
1727     server
.stat_expiredkeys 
= 0; 
1728     server
.stat_starttime 
= time(NULL
); 
1729     server
.unixtime 
= time(NULL
); 
1730     aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
); 
1731     if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
, 
1732         acceptHandler
, NULL
) == AE_ERR
) oom("creating file event"); 
1734     if (server
.appendonly
) { 
1735         server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
1736         if (server
.appendfd 
== -1) { 
1737             redisLog(REDIS_WARNING
, "Can't open the append-only file: %s", 
1743     if (server
.vm_enabled
) vmInit(); 
1746 /* Empty the whole database */ 
1747 static long long emptyDb() { 
1749     long long removed 
= 0; 
1751     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1752         removed 
+= dictSize(server
.db
[j
].dict
); 
1753         dictEmpty(server
.db
[j
].dict
); 
1754         dictEmpty(server
.db
[j
].expires
); 
1759 static int yesnotoi(char *s
) { 
1760     if (!strcasecmp(s
,"yes")) return 1; 
1761     else if (!strcasecmp(s
,"no")) return 0; 
1765 /* I agree, this is a very rudimental way to load a configuration... 
1766    will improve later if the config gets more complex */ 
1767 static void loadServerConfig(char *filename
) { 
1769     char buf
[REDIS_CONFIGLINE_MAX
+1], *err 
= NULL
; 
1773     if (filename
[0] == '-' && filename
[1] == '\0') 
1776         if ((fp 
= fopen(filename
,"r")) == NULL
) { 
1777             redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
); 
1782     while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) { 
1788         line 
= sdstrim(line
," \t\r\n"); 
1790         /* Skip comments and blank lines*/ 
1791         if (line
[0] == '#' || line
[0] == '\0') { 
1796         /* Split into arguments */ 
1797         argv 
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
); 
1798         sdstolower(argv
[0]); 
1800         /* Execute config directives */ 
1801         if (!strcasecmp(argv
[0],"timeout") && argc 
== 2) { 
1802             server
.maxidletime 
= atoi(argv
[1]); 
1803             if (server
.maxidletime 
< 0) { 
1804                 err 
= "Invalid timeout value"; goto loaderr
; 
1806         } else if (!strcasecmp(argv
[0],"port") && argc 
== 2) { 
1807             server
.port 
= atoi(argv
[1]); 
1808             if (server
.port 
< 1 || server
.port 
> 65535) { 
1809                 err 
= "Invalid port"; goto loaderr
; 
1811         } else if (!strcasecmp(argv
[0],"bind") && argc 
== 2) { 
1812             server
.bindaddr 
= zstrdup(argv
[1]); 
1813         } else if (!strcasecmp(argv
[0],"save") && argc 
== 3) { 
1814             int seconds 
= atoi(argv
[1]); 
1815             int changes 
= atoi(argv
[2]); 
1816             if (seconds 
< 1 || changes 
< 0) { 
1817                 err 
= "Invalid save parameters"; goto loaderr
; 
1819             appendServerSaveParams(seconds
,changes
); 
1820         } else if (!strcasecmp(argv
[0],"dir") && argc 
== 2) { 
1821             if (chdir(argv
[1]) == -1) { 
1822                 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s", 
1823                     argv
[1], strerror(errno
)); 
1826         } else if (!strcasecmp(argv
[0],"loglevel") && argc 
== 2) { 
1827             if (!strcasecmp(argv
[1],"debug")) server
.verbosity 
= REDIS_DEBUG
; 
1828             else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity 
= REDIS_VERBOSE
; 
1829             else if (!strcasecmp(argv
[1],"notice")) server
.verbosity 
= REDIS_NOTICE
; 
1830             else if (!strcasecmp(argv
[1],"warning")) server
.verbosity 
= REDIS_WARNING
; 
1832                 err 
= "Invalid log level. Must be one of debug, notice, warning"; 
1835         } else if (!strcasecmp(argv
[0],"logfile") && argc 
== 2) { 
1838             server
.logfile 
= zstrdup(argv
[1]); 
1839             if (!strcasecmp(server
.logfile
,"stdout")) { 
1840                 zfree(server
.logfile
); 
1841                 server
.logfile 
= NULL
; 
1843             if (server
.logfile
) { 
1844                 /* Test if we are able to open the file. The server will not 
1845                  * be able to abort just for this problem later... */ 
1846                 logfp 
= fopen(server
.logfile
,"a"); 
1847                 if (logfp 
== NULL
) { 
1848                     err 
= sdscatprintf(sdsempty(), 
1849                         "Can't open the log file: %s", strerror(errno
)); 
1854         } else if (!strcasecmp(argv
[0],"databases") && argc 
== 2) { 
1855             server
.dbnum 
= atoi(argv
[1]); 
1856             if (server
.dbnum 
< 1) { 
1857                 err 
= "Invalid number of databases"; goto loaderr
; 
1859         } else if (!strcasecmp(argv
[0],"include") && argc 
== 2) { 
1860             loadServerConfig(argv
[1]); 
1861         } else if (!strcasecmp(argv
[0],"maxclients") && argc 
== 2) { 
1862             server
.maxclients 
= atoi(argv
[1]); 
1863         } else if (!strcasecmp(argv
[0],"maxmemory") && argc 
== 2) { 
1864             server
.maxmemory 
= memtoll(argv
[1],NULL
); 
1865         } else if (!strcasecmp(argv
[0],"slaveof") && argc 
== 3) { 
1866             server
.masterhost 
= sdsnew(argv
[1]); 
1867             server
.masterport 
= atoi(argv
[2]); 
1868             server
.replstate 
= REDIS_REPL_CONNECT
; 
1869         } else if (!strcasecmp(argv
[0],"masterauth") && argc 
== 2) { 
1870                 server
.masterauth 
= zstrdup(argv
[1]); 
1871         } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc 
== 2) { 
1872             if ((server
.glueoutputbuf 
= yesnotoi(argv
[1])) == -1) { 
1873                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1875         } else if (!strcasecmp(argv
[0],"rdbcompression") && argc 
== 2) { 
1876             if ((server
.rdbcompression 
= yesnotoi(argv
[1])) == -1) { 
1877                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1879         } else if (!strcasecmp(argv
[0],"activerehashing") && argc 
== 2) { 
1880             if ((server
.activerehashing 
= yesnotoi(argv
[1])) == -1) { 
1881                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1883         } else if (!strcasecmp(argv
[0],"daemonize") && argc 
== 2) { 
1884             if ((server
.daemonize 
= yesnotoi(argv
[1])) == -1) { 
1885                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1887         } else if (!strcasecmp(argv
[0],"appendonly") && argc 
== 2) { 
1888             if ((server
.appendonly 
= yesnotoi(argv
[1])) == -1) { 
1889                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1891         } else if (!strcasecmp(argv
[0],"appendfilename") && argc 
== 2) { 
1892             zfree(server
.appendfilename
); 
1893             server
.appendfilename 
= zstrdup(argv
[1]); 
1894         } else if (!strcasecmp(argv
[0],"appendfsync") && argc 
== 2) { 
1895             if (!strcasecmp(argv
[1],"no")) { 
1896                 server
.appendfsync 
= APPENDFSYNC_NO
; 
1897             } else if (!strcasecmp(argv
[1],"always")) { 
1898                 server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1899             } else if (!strcasecmp(argv
[1],"everysec")) { 
1900                 server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1902                 err 
= "argument must be 'no', 'always' or 'everysec'"; 
1905         } else if (!strcasecmp(argv
[0],"requirepass") && argc 
== 2) { 
1906             server
.requirepass 
= zstrdup(argv
[1]); 
1907         } else if (!strcasecmp(argv
[0],"pidfile") && argc 
== 2) { 
1908             zfree(server
.pidfile
); 
1909             server
.pidfile 
= zstrdup(argv
[1]); 
1910         } else if (!strcasecmp(argv
[0],"dbfilename") && argc 
== 2) { 
1911             zfree(server
.dbfilename
); 
1912             server
.dbfilename 
= zstrdup(argv
[1]); 
1913         } else if (!strcasecmp(argv
[0],"vm-enabled") && argc 
== 2) { 
1914             if ((server
.vm_enabled 
= yesnotoi(argv
[1])) == -1) { 
1915                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1917         } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc 
== 2) { 
1918             zfree(server
.vm_swap_file
); 
1919             server
.vm_swap_file 
= zstrdup(argv
[1]); 
1920         } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc 
== 2) { 
1921             server
.vm_max_memory 
= memtoll(argv
[1],NULL
); 
1922         } else if (!strcasecmp(argv
[0],"vm-page-size") && argc 
== 2) { 
1923             server
.vm_page_size 
= memtoll(argv
[1], NULL
); 
1924         } else if (!strcasecmp(argv
[0],"vm-pages") && argc 
== 2) { 
1925             server
.vm_pages 
= memtoll(argv
[1], NULL
); 
1926         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
1927             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
1928         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc 
== 2){ 
1929             server
.hash_max_zipmap_entries 
= memtoll(argv
[1], NULL
); 
1930         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc 
== 2){ 
1931             server
.hash_max_zipmap_value 
= memtoll(argv
[1], NULL
); 
1933             err 
= "Bad directive or wrong number of arguments"; goto loaderr
; 
1935         for (j 
= 0; j 
< argc
; j
++) 
1940     if (fp 
!= stdin
) fclose(fp
); 
1944     fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n"); 
1945     fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
); 
1946     fprintf(stderr
, ">>> '%s'\n", line
); 
1947     fprintf(stderr
, "%s\n", err
); 
1951 static void freeClientArgv(redisClient 
*c
) { 
1954     for (j 
= 0; j 
< c
->argc
; j
++) 
1955         decrRefCount(c
->argv
[j
]); 
1956     for (j 
= 0; j 
< c
->mbargc
; j
++) 
1957         decrRefCount(c
->mbargv
[j
]); 
1962 static void freeClient(redisClient 
*c
) { 
1965     /* Note that if the client we are freeing is blocked into a blocking 
1966      * call, we have to set querybuf to NULL *before* to call 
1967      * unblockClientWaitingData() to avoid processInputBuffer() will get 
1968      * called. Also it is important to remove the file events after 
1969      * this, because this call adds the READABLE event. */ 
1970     sdsfree(c
->querybuf
); 
1972     if (c
->flags 
& REDIS_BLOCKED
) 
1973         unblockClientWaitingData(c
); 
1975     /* Unsubscribe from all the pubsub channels */ 
1976     pubsubUnsubscribeAllChannels(c
,0); 
1977     pubsubUnsubscribeAllPatterns(c
,0); 
1978     dictRelease(c
->pubsub_channels
); 
1979     listRelease(c
->pubsub_patterns
); 
1980     /* Obvious cleanup */ 
1981     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
1982     aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
1983     listRelease(c
->reply
); 
1986     /* Remove from the list of clients */ 
1987     ln 
= listSearchKey(server
.clients
,c
); 
1988     redisAssert(ln 
!= NULL
); 
1989     listDelNode(server
.clients
,ln
); 
1990     /* Remove from the list of clients waiting for swapped keys */ 
1991     if (c
->flags 
& REDIS_IO_WAIT 
&& listLength(c
->io_keys
) == 0) { 
1992         ln 
= listSearchKey(server
.io_ready_clients
,c
); 
1994             listDelNode(server
.io_ready_clients
,ln
); 
1995             server
.vm_blocked_clients
--; 
1998     while (server
.vm_enabled 
&& listLength(c
->io_keys
)) { 
1999         ln 
= listFirst(c
->io_keys
); 
2000         dontWaitForSwappedKey(c
,ln
->value
); 
2002     listRelease(c
->io_keys
); 
2003     /* Master/slave cleanup */ 
2004     if (c
->flags 
& REDIS_SLAVE
) { 
2005         if (c
->replstate 
== REDIS_REPL_SEND_BULK 
&& c
->repldbfd 
!= -1) 
2007         list 
*l 
= (c
->flags 
& REDIS_MONITOR
) ? server
.monitors 
: server
.slaves
; 
2008         ln 
= listSearchKey(l
,c
); 
2009         redisAssert(ln 
!= NULL
); 
2012     if (c
->flags 
& REDIS_MASTER
) { 
2013         server
.master 
= NULL
; 
2014         server
.replstate 
= REDIS_REPL_CONNECT
; 
2016     /* Release memory */ 
2019     freeClientMultiState(c
); 
2023 #define GLUEREPLY_UP_TO (1024) 
2024 static void glueReplyBuffersIfNeeded(redisClient 
*c
) { 
2026     char buf
[GLUEREPLY_UP_TO
]; 
2031     listRewind(c
->reply
,&li
); 
2032     while((ln 
= listNext(&li
))) { 
2036         objlen 
= sdslen(o
->ptr
); 
2037         if (copylen 
+ objlen 
<= GLUEREPLY_UP_TO
) { 
2038             memcpy(buf
+copylen
,o
->ptr
,objlen
); 
2040             listDelNode(c
->reply
,ln
); 
2042             if (copylen 
== 0) return; 
2046     /* Now the output buffer is empty, add the new single element */ 
2047     o 
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
)); 
2048     listAddNodeHead(c
->reply
,o
); 
2051 static void sendReplyToClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2052     redisClient 
*c 
= privdata
; 
2053     int nwritten 
= 0, totwritten 
= 0, objlen
; 
2056     REDIS_NOTUSED(mask
); 
2058     /* Use writev() if we have enough buffers to send */ 
2059     if (!server
.glueoutputbuf 
&& 
2060         listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD 
&& 
2061         !(c
->flags 
& REDIS_MASTER
)) 
2063         sendReplyToClientWritev(el
, fd
, privdata
, mask
); 
2067     while(listLength(c
->reply
)) { 
2068         if (server
.glueoutputbuf 
&& listLength(c
->reply
) > 1) 
2069             glueReplyBuffersIfNeeded(c
); 
2071         o 
= listNodeValue(listFirst(c
->reply
)); 
2072         objlen 
= sdslen(o
->ptr
); 
2075             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2079         if (c
->flags 
& REDIS_MASTER
) { 
2080             /* Don't reply to a master */ 
2081             nwritten 
= objlen 
- c
->sentlen
; 
2083             nwritten 
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen 
- c
->sentlen
); 
2084             if (nwritten 
<= 0) break; 
2086         c
->sentlen 
+= nwritten
; 
2087         totwritten 
+= nwritten
; 
2088         /* If we fully sent the object on head go to the next one */ 
2089         if (c
->sentlen 
== objlen
) { 
2090             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2093         /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT 
2094          * bytes, in a single threaded server it's a good idea to serve 
2095          * other clients as well, even if a very large request comes from 
2096          * super fast link that is always able to accept data (in real world 
2097          * scenario think about 'KEYS *' against the loopback interfae) */ 
2098         if (totwritten 
> REDIS_MAX_WRITE_PER_EVENT
) break; 
2100     if (nwritten 
== -1) { 
2101         if (errno 
== EAGAIN
) { 
2104             redisLog(REDIS_VERBOSE
, 
2105                 "Error writing to client: %s", strerror(errno
)); 
2110     if (totwritten 
> 0) c
->lastinteraction 
= time(NULL
); 
2111     if (listLength(c
->reply
) == 0) { 
2113         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2117 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) 
2119     redisClient 
*c 
= privdata
; 
2120     int nwritten 
= 0, totwritten 
= 0, objlen
, willwrite
; 
2122     struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
]; 
2123     int offset
, ion 
= 0; 
2125     REDIS_NOTUSED(mask
); 
2128     while (listLength(c
->reply
)) { 
2129         offset 
= c
->sentlen
; 
2133         /* fill-in the iov[] array */ 
2134         for(node 
= listFirst(c
->reply
); node
; node 
= listNextNode(node
)) { 
2135             o 
= listNodeValue(node
); 
2136             objlen 
= sdslen(o
->ptr
); 
2138             if (totwritten 
+ objlen 
- offset 
> REDIS_MAX_WRITE_PER_EVENT
) 
2141             if(ion 
== REDIS_WRITEV_IOVEC_COUNT
) 
2142                 break; /* no more iovecs */ 
2144             iov
[ion
].iov_base 
= ((char*)o
->ptr
) + offset
; 
2145             iov
[ion
].iov_len 
= objlen 
- offset
; 
2146             willwrite 
+= objlen 
- offset
; 
2147             offset 
= 0; /* just for the first item */ 
2154         /* write all collected blocks at once */ 
2155         if((nwritten 
= writev(fd
, iov
, ion
)) < 0) { 
2156             if (errno 
!= EAGAIN
) { 
2157                 redisLog(REDIS_VERBOSE
, 
2158                          "Error writing to client: %s", strerror(errno
)); 
2165         totwritten 
+= nwritten
; 
2166         offset 
= c
->sentlen
; 
2168         /* remove written robjs from c->reply */ 
2169         while (nwritten 
&& listLength(c
->reply
)) { 
2170             o 
= listNodeValue(listFirst(c
->reply
)); 
2171             objlen 
= sdslen(o
->ptr
); 
2173             if(nwritten 
>= objlen 
- offset
) { 
2174                 listDelNode(c
->reply
, listFirst(c
->reply
)); 
2175                 nwritten 
-= objlen 
- offset
; 
2179                 c
->sentlen 
+= nwritten
; 
2187         c
->lastinteraction 
= time(NULL
); 
2189     if (listLength(c
->reply
) == 0) { 
2191         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2195 static struct redisCommand 
*lookupCommand(char *name
) { 
2197     while(cmdTable
[j
].name 
!= NULL
) { 
2198         if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
]; 
2204 /* resetClient prepare the client to process the next command */ 
2205 static void resetClient(redisClient 
*c
) { 
2211 /* Call() is the core of Redis execution of a command */ 
2212 static void call(redisClient 
*c
, struct redisCommand 
*cmd
) { 
2215     dirty 
= server
.dirty
; 
2217     dirty 
= server
.dirty
-dirty
; 
2219     if (server
.appendonly 
&& dirty
) 
2220         feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2221     if ((dirty 
|| cmd
->flags 
& REDIS_CMD_FORCE_REPLICATION
) && 
2222         listLength(server
.slaves
)) 
2223         replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
); 
2224     if (listLength(server
.monitors
)) 
2225         replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
); 
2226     server
.stat_numcommands
++; 
2229 /* If this function gets called we already read a whole 
2230  * command, argments are in the client argv/argc fields. 
2231  * processCommand() execute the command or prepare the 
2232  * server for a bulk read from the client. 
2234  * If 1 is returned the client is still alive and valid and 
2235  * and other operations can be performed by the caller. Otherwise 
2236  * if 0 is returned the client was destroied (i.e. after QUIT). */ 
2237 static int processCommand(redisClient 
*c
) { 
2238     struct redisCommand 
*cmd
; 
2240     /* Free some memory if needed (maxmemory setting) */ 
2241     if (server
.maxmemory
) freeMemoryIfNeeded(); 
2243     /* Handle the multi bulk command type. This is an alternative protocol 
2244      * supported by Redis in order to receive commands that are composed of 
2245      * multiple binary-safe "bulk" arguments. The latency of processing is 
2246      * a bit higher but this allows things like multi-sets, so if this 
2247      * protocol is used only for MSET and similar commands this is a big win. */ 
2248     if (c
->multibulk 
== 0 && c
->argc 
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') { 
2249         c
->multibulk 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2250         if (c
->multibulk 
<= 0) { 
2254             decrRefCount(c
->argv
[c
->argc
-1]); 
2258     } else if (c
->multibulk
) { 
2259         if (c
->bulklen 
== -1) { 
2260             if (((char*)c
->argv
[0]->ptr
)[0] != '$') { 
2261                 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n")); 
2265                 int bulklen 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2266                 decrRefCount(c
->argv
[0]); 
2267                 if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2269                     addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2274                 c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2278             c
->mbargv 
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1)); 
2279             c
->mbargv
[c
->mbargc
] = c
->argv
[0]; 
2283             if (c
->multibulk 
== 0) { 
2287                 /* Here we need to swap the multi-bulk argc/argv with the 
2288                  * normal argc/argv of the client structure. */ 
2290                 c
->argv 
= c
->mbargv
; 
2291                 c
->mbargv 
= auxargv
; 
2294                 c
->argc 
= c
->mbargc
; 
2295                 c
->mbargc 
= auxargc
; 
2297                 /* We need to set bulklen to something different than -1 
2298                  * in order for the code below to process the command without 
2299                  * to try to read the last argument of a bulk command as 
2300                  * a special argument. */ 
2302                 /* continue below and process the command */ 
2309     /* -- end of multi bulk commands processing -- */ 
2311     /* The QUIT command is handled as a special case. Normal command 
2312      * procs are unable to close the client connection safely */ 
2313     if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) { 
2318     /* Now lookup the command and check ASAP about trivial error conditions 
2319      * such wrong arity, bad command name and so forth. */ 
2320     cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
2323             sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", 
2324                 (char*)c
->argv
[0]->ptr
)); 
2327     } else if ((cmd
->arity 
> 0 && cmd
->arity 
!= c
->argc
) || 
2328                (c
->argc 
< -cmd
->arity
)) { 
2330             sdscatprintf(sdsempty(), 
2331                 "-ERR wrong number of arguments for '%s' command\r\n", 
2335     } else if (cmd
->flags 
& REDIS_CMD_BULK 
&& c
->bulklen 
== -1) { 
2336         /* This is a bulk command, we have to read the last argument yet. */ 
2337         int bulklen 
= atoi(c
->argv
[c
->argc
-1]->ptr
); 
2339         decrRefCount(c
->argv
[c
->argc
-1]); 
2340         if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2342             addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2347         c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2348         /* It is possible that the bulk read is already in the 
2349          * buffer. Check this condition and handle it accordingly. 
2350          * This is just a fast path, alternative to call processInputBuffer(). 
2351          * It's a good idea since the code is small and this condition 
2352          * happens most of the times. */ 
2353         if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) { 
2354             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2356             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2358             /* Otherwise return... there is to read the last argument 
2359              * from the socket. */ 
2363     /* Let's try to encode the bulk object to save space. */ 
2364     if (cmd
->flags 
& REDIS_CMD_BULK
) 
2365         c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]); 
2367     /* Check if the user is authenticated */ 
2368     if (server
.requirepass 
&& !c
->authenticated 
&& cmd
->proc 
!= authCommand
) { 
2369         addReplySds(c
,sdsnew("-ERR operation not permitted\r\n")); 
2374     /* Handle the maxmemory directive */ 
2375     if (server
.maxmemory 
&& (cmd
->flags 
& REDIS_CMD_DENYOOM
) && 
2376         zmalloc_used_memory() > server
.maxmemory
) 
2378         addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); 
2383     /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ 
2384     if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0) 
2386         cmd
->proc 
!= subscribeCommand 
&& cmd
->proc 
!= unsubscribeCommand 
&& 
2387         cmd
->proc 
!= psubscribeCommand 
&& cmd
->proc 
!= punsubscribeCommand
) { 
2388         addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n")); 
2393     /* Exec the command */ 
2394     if (c
->flags 
& REDIS_MULTI 
&& cmd
->proc 
!= execCommand 
&& cmd
->proc 
!= discardCommand
) { 
2395         queueMultiCommand(c
,cmd
); 
2396         addReply(c
,shared
.queued
); 
2398         if (server
.vm_enabled 
&& server
.vm_max_threads 
> 0 && 
2399             blockClientOnSwappedKeys(cmd
,c
)) return 1; 
2403     /* Prepare the client for the next command */ 
2408 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
) { 
2413     /* We need 1+(ARGS*3) objects since commands are using the new protocol 
2414      * and we one 1 object for the first "*<count>\r\n" multibulk count, then 
2415      * for every additional object we have "$<count>\r\n" + object + "\r\n". */ 
2416     robj 
*static_outv
[REDIS_STATIC_ARGS
*3+1]; 
2419     if (argc 
<= REDIS_STATIC_ARGS
) { 
2422         outv 
= zmalloc(sizeof(robj
*)*(argc
*3+1)); 
2425     lenobj 
= createObject(REDIS_STRING
, 
2426             sdscatprintf(sdsempty(), "*%d\r\n", argc
)); 
2427     lenobj
->refcount 
= 0; 
2428     outv
[outc
++] = lenobj
; 
2429     for (j 
= 0; j 
< argc
; j
++) { 
2430         lenobj 
= createObject(REDIS_STRING
, 
2431             sdscatprintf(sdsempty(),"$%lu\r\n", 
2432                 (unsigned long) stringObjectLen(argv
[j
]))); 
2433         lenobj
->refcount 
= 0; 
2434         outv
[outc
++] = lenobj
; 
2435         outv
[outc
++] = argv
[j
]; 
2436         outv
[outc
++] = shared
.crlf
; 
2439     /* Increment all the refcounts at start and decrement at end in order to 
2440      * be sure to free objects if there is no slave in a replication state 
2441      * able to be feed with commands */ 
2442     for (j 
= 0; j 
< outc
; j
++) incrRefCount(outv
[j
]); 
2443     listRewind(slaves
,&li
); 
2444     while((ln 
= listNext(&li
))) { 
2445         redisClient 
*slave 
= ln
->value
; 
2447         /* Don't feed slaves that are still waiting for BGSAVE to start */ 
2448         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) continue; 
2450         /* Feed all the other slaves, MONITORs and so on */ 
2451         if (slave
->slaveseldb 
!= dictid
) { 
2455             case 0: selectcmd 
= shared
.select0
; break; 
2456             case 1: selectcmd 
= shared
.select1
; break; 
2457             case 2: selectcmd 
= shared
.select2
; break; 
2458             case 3: selectcmd 
= shared
.select3
; break; 
2459             case 4: selectcmd 
= shared
.select4
; break; 
2460             case 5: selectcmd 
= shared
.select5
; break; 
2461             case 6: selectcmd 
= shared
.select6
; break; 
2462             case 7: selectcmd 
= shared
.select7
; break; 
2463             case 8: selectcmd 
= shared
.select8
; break; 
2464             case 9: selectcmd 
= shared
.select9
; break; 
2466                 selectcmd 
= createObject(REDIS_STRING
, 
2467                     sdscatprintf(sdsempty(),"select %d\r\n",dictid
)); 
2468                 selectcmd
->refcount 
= 0; 
2471             addReply(slave
,selectcmd
); 
2472             slave
->slaveseldb 
= dictid
; 
2474         for (j 
= 0; j 
< outc
; j
++) addReply(slave
,outv
[j
]); 
2476     for (j 
= 0; j 
< outc
; j
++) decrRefCount(outv
[j
]); 
2477     if (outv 
!= static_outv
) zfree(outv
); 
2480 static sds 
sdscatrepr(sds s
, char *p
, size_t len
) { 
2481     s 
= sdscatlen(s
,"\"",1); 
2486             s 
= sdscatprintf(s
,"\\%c",*p
); 
2488         case '\n': s 
= sdscatlen(s
,"\\n",1); break; 
2489         case '\r': s 
= sdscatlen(s
,"\\r",1); break; 
2490         case '\t': s 
= sdscatlen(s
,"\\t",1); break; 
2491         case '\a': s 
= sdscatlen(s
,"\\a",1); break; 
2492         case '\b': s 
= sdscatlen(s
,"\\b",1); break; 
2495                 s 
= sdscatprintf(s
,"%c",*p
); 
2497                 s 
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
); 
2502     return sdscatlen(s
,"\"",1); 
2505 static void replicationFeedMonitors(list 
*monitors
, int dictid
, robj 
**argv
, int argc
) { 
2509     sds cmdrepr 
= sdsnew("+"); 
2513     gettimeofday(&tv
,NULL
); 
2514     cmdrepr 
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
); 
2515     if (dictid 
!= 0) cmdrepr 
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
); 
2517     for (j 
= 0; j 
< argc
; j
++) { 
2518         if (argv
[j
]->encoding 
== REDIS_ENCODING_INT
) { 
2519             cmdrepr 
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
); 
2521             cmdrepr 
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
, 
2522                         sdslen(argv
[j
]->ptr
)); 
2525             cmdrepr 
= sdscatlen(cmdrepr
," ",1); 
2527     cmdrepr 
= sdscatlen(cmdrepr
,"\r\n",2); 
2528     cmdobj 
= createObject(REDIS_STRING
,cmdrepr
); 
2530     listRewind(monitors
,&li
); 
2531     while((ln 
= listNext(&li
))) { 
2532         redisClient 
*monitor 
= ln
->value
; 
2533         addReply(monitor
,cmdobj
); 
2535     decrRefCount(cmdobj
); 
2538 static void processInputBuffer(redisClient 
*c
) { 
2540     /* Before to process the input buffer, make sure the client is not 
2541      * waitig for a blocking operation such as BLPOP. Note that the first 
2542      * iteration the client is never blocked, otherwise the processInputBuffer 
2543      * would not be called at all, but after the execution of the first commands 
2544      * in the input buffer the client may be blocked, and the "goto again" 
2545      * will try to reiterate. The following line will make it return asap. */ 
2546     if (c
->flags 
& REDIS_BLOCKED 
|| c
->flags 
& REDIS_IO_WAIT
) return; 
2547     if (c
->bulklen 
== -1) { 
2548         /* Read the first line of the query */ 
2549         char *p 
= strchr(c
->querybuf
,'\n'); 
2556             query 
= c
->querybuf
; 
2557             c
->querybuf 
= sdsempty(); 
2558             querylen 
= 1+(p
-(query
)); 
2559             if (sdslen(query
) > querylen
) { 
2560                 /* leave data after the first line of the query in the buffer */ 
2561                 c
->querybuf 
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
); 
2563             *p 
= '\0'; /* remove "\n" */ 
2564             if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */ 
2565             sdsupdatelen(query
); 
2567             /* Now we can split the query in arguments */ 
2568             argv 
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
); 
2571             if (c
->argv
) zfree(c
->argv
); 
2572             c
->argv 
= zmalloc(sizeof(robj
*)*argc
); 
2574             for (j 
= 0; j 
< argc
; j
++) { 
2575                 if (sdslen(argv
[j
])) { 
2576                     c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]); 
2584                 /* Execute the command. If the client is still valid 
2585                  * after processCommand() return and there is something 
2586                  * on the query buffer try to process the next command. */ 
2587                 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2589                 /* Nothing to process, argc == 0. Just process the query 
2590                  * buffer if it's not empty or return to the caller */ 
2591                 if (sdslen(c
->querybuf
)) goto again
; 
2594         } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) { 
2595             redisLog(REDIS_VERBOSE
, "Client protocol error"); 
2600         /* Bulk read handling. Note that if we are at this point 
2601            the client already sent a command terminated with a newline, 
2602            we are reading the bulk data that is actually the last 
2603            argument of the command. */ 
2604         int qbl 
= sdslen(c
->querybuf
); 
2606         if (c
->bulklen 
<= qbl
) { 
2607             /* Copy everything but the final CRLF as final argument */ 
2608             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2610             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2611             /* Process the command. If the client is still valid after 
2612              * the processing and there is more data in the buffer 
2613              * try to parse it. */ 
2614             if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2620 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2621     redisClient 
*c 
= (redisClient
*) privdata
; 
2622     char buf
[REDIS_IOBUF_LEN
]; 
2625     REDIS_NOTUSED(mask
); 
2627     nread 
= read(fd
, buf
, REDIS_IOBUF_LEN
); 
2629         if (errno 
== EAGAIN
) { 
2632             redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
)); 
2636     } else if (nread 
== 0) { 
2637         redisLog(REDIS_VERBOSE
, "Client closed connection"); 
2642         c
->querybuf 
= sdscatlen(c
->querybuf
, buf
, nread
); 
2643         c
->lastinteraction 
= time(NULL
); 
2647     processInputBuffer(c
); 
2650 static int selectDb(redisClient 
*c
, int id
) { 
2651     if (id 
< 0 || id 
>= server
.dbnum
) 
2653     c
->db 
= &server
.db
[id
]; 
2657 static void *dupClientReplyValue(void *o
) { 
2658     incrRefCount((robj
*)o
); 
2662 static int listMatchObjects(void *a
, void *b
) { 
2663     return compareStringObjects(a
,b
) == 0; 
2666 static redisClient 
*createClient(int fd
) { 
2667     redisClient 
*c 
= zmalloc(sizeof(*c
)); 
2669     anetNonBlock(NULL
,fd
); 
2670     anetTcpNoDelay(NULL
,fd
); 
2671     if (!c
) return NULL
; 
2674     c
->querybuf 
= sdsempty(); 
2683     c
->lastinteraction 
= time(NULL
); 
2684     c
->authenticated 
= 0; 
2685     c
->replstate 
= REDIS_REPL_NONE
; 
2686     c
->reply 
= listCreate(); 
2687     listSetFreeMethod(c
->reply
,decrRefCount
); 
2688     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
2689     c
->blockingkeys 
= NULL
; 
2690     c
->blockingkeysnum 
= 0; 
2691     c
->io_keys 
= listCreate(); 
2692     listSetFreeMethod(c
->io_keys
,decrRefCount
); 
2693     c
->pubsub_channels 
= dictCreate(&setDictType
,NULL
); 
2694     c
->pubsub_patterns 
= listCreate(); 
2695     listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
); 
2696     listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
); 
2697     if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
2698         readQueryFromClient
, c
) == AE_ERR
) { 
2702     listAddNodeTail(server
.clients
,c
); 
2703     initClientMultiState(c
); 
2707 static void addReply(redisClient 
*c
, robj 
*obj
) { 
2708     if (listLength(c
->reply
) == 0 && 
2709         (c
->replstate 
== REDIS_REPL_NONE 
|| 
2710          c
->replstate 
== REDIS_REPL_ONLINE
) && 
2711         aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
, 
2712         sendReplyToClient
, c
) == AE_ERR
) return; 
2714     if (server
.vm_enabled 
&& obj
->storage 
!= REDIS_VM_MEMORY
) { 
2715         obj 
= dupStringObject(obj
); 
2716         obj
->refcount 
= 0; /* getDecodedObject() will increment the refcount */ 
2718     listAddNodeTail(c
->reply
,getDecodedObject(obj
)); 
2721 static void addReplySds(redisClient 
*c
, sds s
) { 
2722     robj 
*o 
= createObject(REDIS_STRING
,s
); 
2727 static void addReplyDouble(redisClient 
*c
, double d
) { 
2730     snprintf(buf
,sizeof(buf
),"%.17g",d
); 
2731     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", 
2732         (unsigned long) strlen(buf
),buf
)); 
2735 static void addReplyLong(redisClient 
*c
, long l
) { 
2740         addReply(c
,shared
.czero
); 
2742     } else if (l 
== 1) { 
2743         addReply(c
,shared
.cone
); 
2746     len 
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
); 
2747     addReplySds(c
,sdsnewlen(buf
,len
)); 
2750 static void addReplyLongLong(redisClient 
*c
, long long ll
) { 
2755         addReply(c
,shared
.czero
); 
2757     } else if (ll 
== 1) { 
2758         addReply(c
,shared
.cone
); 
2761     len 
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
); 
2762     addReplySds(c
,sdsnewlen(buf
,len
)); 
2765 static void addReplyUlong(redisClient 
*c
, unsigned long ul
) { 
2770         addReply(c
,shared
.czero
); 
2772     } else if (ul 
== 1) { 
2773         addReply(c
,shared
.cone
); 
2776     len 
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
); 
2777     addReplySds(c
,sdsnewlen(buf
,len
)); 
2780 static void addReplyBulkLen(redisClient 
*c
, robj 
*obj
) { 
2783     if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
2784         len 
= sdslen(obj
->ptr
); 
2786         long n 
= (long)obj
->ptr
; 
2788         /* Compute how many bytes will take this integer as a radix 10 string */ 
2794         while((n 
= n
/10) != 0) { 
2798     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
)); 
2801 static void addReplyBulk(redisClient 
*c
, robj 
*obj
) { 
2802     addReplyBulkLen(c
,obj
); 
2804     addReply(c
,shared
.crlf
); 
2807 /* In the CONFIG command we need to add vanilla C string as bulk replies */ 
2808 static void addReplyBulkCString(redisClient 
*c
, char *s
) { 
2810         addReply(c
,shared
.nullbulk
); 
2812         robj 
*o 
= createStringObject(s
,strlen(s
)); 
2818 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2823     REDIS_NOTUSED(mask
); 
2824     REDIS_NOTUSED(privdata
); 
2826     cfd 
= anetAccept(server
.neterr
, fd
, cip
, &cport
); 
2827     if (cfd 
== AE_ERR
) { 
2828         redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
); 
2831     redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
); 
2832     if ((c 
= createClient(cfd
)) == NULL
) { 
2833         redisLog(REDIS_WARNING
,"Error allocating resoures for the client"); 
2834         close(cfd
); /* May be already closed, just ingore errors */ 
2837     /* If maxclient directive is set and this is one client more... close the 
2838      * connection. Note that we create the client instead to check before 
2839      * for this condition, since now the socket is already set in nonblocking 
2840      * mode and we can send an error for free using the Kernel I/O */ 
2841     if (server
.maxclients 
&& listLength(server
.clients
) > server
.maxclients
) { 
2842         char *err 
= "-ERR max number of clients reached\r\n"; 
2844         /* That's a best effort error message, don't check write errors */ 
2845         if (write(c
->fd
,err
,strlen(err
)) == -1) { 
2846             /* Nothing to do, Just to avoid the warning... */ 
2851     server
.stat_numconnections
++; 
2854 /* ======================= Redis objects implementation ===================== */ 
2856 static robj 
*createObject(int type
, void *ptr
) { 
2859     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2860     if (listLength(server
.objfreelist
)) { 
2861         listNode 
*head 
= listFirst(server
.objfreelist
); 
2862         o 
= listNodeValue(head
); 
2863         listDelNode(server
.objfreelist
,head
); 
2864         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2866         if (server
.vm_enabled
) { 
2867             pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2868             o 
= zmalloc(sizeof(*o
)); 
2870             o 
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
)); 
2874     o
->encoding 
= REDIS_ENCODING_RAW
; 
2877     if (server
.vm_enabled
) { 
2878         /* Note that this code may run in the context of an I/O thread 
2879          * and accessing to server.unixtime in theory is an error 
2880          * (no locks). But in practice this is safe, and even if we read 
2881          * garbage Redis will not fail, as it's just a statistical info */ 
2882         o
->vm
.atime 
= server
.unixtime
; 
2883         o
->storage 
= REDIS_VM_MEMORY
; 
2888 static robj 
*createStringObject(char *ptr
, size_t len
) { 
2889     return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
)); 
2892 static robj 
*createStringObjectFromLongLong(long long value
) { 
2894     if (value 
>= 0 && value 
< REDIS_SHARED_INTEGERS
) { 
2895         incrRefCount(shared
.integers
[value
]); 
2896         o 
= shared
.integers
[value
]; 
2898         o 
= createObject(REDIS_STRING
, NULL
); 
2899         if (value 
>= LONG_MIN 
&& value 
<= LONG_MAX
) { 
2900             o
->encoding 
= REDIS_ENCODING_INT
; 
2901             o
->ptr 
= (void*)((long)value
); 
2903             o
->ptr 
= sdscatprintf(sdsempty(),"%lld",value
); 
2909 static robj 
*dupStringObject(robj 
*o
) { 
2910     assert(o
->encoding 
== REDIS_ENCODING_RAW
); 
2911     return createStringObject(o
->ptr
,sdslen(o
->ptr
)); 
2914 static robj 
*createListObject(void) { 
2915     list 
*l 
= listCreate(); 
2917     listSetFreeMethod(l
,decrRefCount
); 
2918     return createObject(REDIS_LIST
,l
); 
2921 static robj 
*createSetObject(void) { 
2922     dict 
*d 
= dictCreate(&setDictType
,NULL
); 
2923     return createObject(REDIS_SET
,d
); 
2926 static robj 
*createHashObject(void) { 
2927     /* All the Hashes start as zipmaps. Will be automatically converted 
2928      * into hash tables if there are enough elements or big elements 
2930     unsigned char *zm 
= zipmapNew(); 
2931     robj 
*o 
= createObject(REDIS_HASH
,zm
); 
2932     o
->encoding 
= REDIS_ENCODING_ZIPMAP
; 
2936 static robj 
*createZsetObject(void) { 
2937     zset 
*zs 
= zmalloc(sizeof(*zs
)); 
2939     zs
->dict 
= dictCreate(&zsetDictType
,NULL
); 
2940     zs
->zsl 
= zslCreate(); 
2941     return createObject(REDIS_ZSET
,zs
); 
2944 static void freeStringObject(robj 
*o
) { 
2945     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2950 static void freeListObject(robj 
*o
) { 
2951     listRelease((list
*) o
->ptr
); 
2954 static void freeSetObject(robj 
*o
) { 
2955     dictRelease((dict
*) o
->ptr
); 
2958 static void freeZsetObject(robj 
*o
) { 
2961     dictRelease(zs
->dict
); 
2966 static void freeHashObject(robj 
*o
) { 
2967     switch (o
->encoding
) { 
2968     case REDIS_ENCODING_HT
: 
2969         dictRelease((dict
*) o
->ptr
); 
2971     case REDIS_ENCODING_ZIPMAP
: 
2975         redisPanic("Unknown hash encoding type"); 
2980 static void incrRefCount(robj 
*o
) { 
2984 static void decrRefCount(void *obj
) { 
2987     if (o
->refcount 
<= 0) redisPanic("decrRefCount against refcount <= 0"); 
2988     /* Object is a key of a swapped out value, or in the process of being 
2990     if (server
.vm_enabled 
&& 
2991         (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
)) 
2993         if (o
->storage 
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
); 
2994         redisAssert(o
->type 
== REDIS_STRING
); 
2995         freeStringObject(o
); 
2996         vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
); 
2997         pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2998         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
2999             !listAddNodeHead(server
.objfreelist
,o
)) 
3001         pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3002         server
.vm_stats_swapped_objects
--; 
3005     /* Object is in memory, or in the process of being swapped out. */ 
3006     if (--(o
->refcount
) == 0) { 
3007         if (server
.vm_enabled 
&& o
->storage 
== REDIS_VM_SWAPPING
) 
3008             vmCancelThreadedIOJob(obj
); 
3010         case REDIS_STRING
: freeStringObject(o
); break; 
3011         case REDIS_LIST
: freeListObject(o
); break; 
3012         case REDIS_SET
: freeSetObject(o
); break; 
3013         case REDIS_ZSET
: freeZsetObject(o
); break; 
3014         case REDIS_HASH
: freeHashObject(o
); break; 
3015         default: redisPanic("Unknown object type"); break; 
3017         if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
3018         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
3019             !listAddNodeHead(server
.objfreelist
,o
)) 
3021         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3025 static robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
3026     dictEntry 
*de 
= dictFind(db
->dict
,key
); 
3028         robj 
*key 
= dictGetEntryKey(de
); 
3029         robj 
*val 
= dictGetEntryVal(de
); 
3031         if (server
.vm_enabled
) { 
3032             if (key
->storage 
== REDIS_VM_MEMORY 
|| 
3033                 key
->storage 
== REDIS_VM_SWAPPING
) 
3035                 /* If we were swapping the object out, stop it, this key 
3037                 if (key
->storage 
== REDIS_VM_SWAPPING
) 
3038                     vmCancelThreadedIOJob(key
); 
3039                 /* Update the access time of the key for the aging algorithm. */ 
3040                 key
->vm
.atime 
= server
.unixtime
; 
3042                 int notify 
= (key
->storage 
== REDIS_VM_LOADING
); 
3044                 /* Our value was swapped on disk. Bring it at home. */ 
3045                 redisAssert(val 
== NULL
); 
3046                 val 
= vmLoadObject(key
); 
3047                 dictGetEntryVal(de
) = val
; 
3049                 /* Clients blocked by the VM subsystem may be waiting for 
3051                 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
); 
3060 static robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
3061     expireIfNeeded(db
,key
); 
3062     return lookupKey(db
,key
); 
3065 static robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
3066     deleteIfVolatile(db
,key
); 
3067     return lookupKey(db
,key
); 
3070 static robj 
*lookupKeyReadOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
3071     robj 
*o 
= lookupKeyRead(c
->db
, key
); 
3072     if (!o
) addReply(c
,reply
); 
3076 static robj 
*lookupKeyWriteOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
3077     robj 
*o 
= lookupKeyWrite(c
->db
, key
); 
3078     if (!o
) addReply(c
,reply
); 
3082 static int checkType(redisClient 
*c
, robj 
*o
, int type
) { 
3083     if (o
->type 
!= type
) { 
3084         addReply(c
,shared
.wrongtypeerr
); 
3090 static int deleteKey(redisDb 
*db
, robj 
*key
) { 
3093     /* We need to protect key from destruction: after the first dictDelete() 
3094      * it may happen that 'key' is no longer valid if we don't increment 
3095      * it's count. This may happen when we get the object reference directly 
3096      * from the hash table with dictRandomKey() or dict iterators */ 
3098     if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
); 
3099     retval 
= dictDelete(db
->dict
,key
); 
3102     return retval 
== DICT_OK
; 
3105 /* Check if the nul-terminated string 's' can be represented by a long 
3106  * (that is, is a number that fits into long without any other space or 
3107  * character before or after the digits). 
3109  * If so, the function returns REDIS_OK and *longval is set to the value 
3110  * of the number. Otherwise REDIS_ERR is returned */ 
3111 static int isStringRepresentableAsLong(sds s
, long *longval
) { 
3112     char buf
[32], *endptr
; 
3116     value 
= strtol(s
, &endptr
, 10); 
3117     if (endptr
[0] != '\0') return REDIS_ERR
; 
3118     slen 
= snprintf(buf
,32,"%ld",value
); 
3120     /* If the number converted back into a string is not identical 
3121      * then it's not possible to encode the string as integer */ 
3122     if (sdslen(s
) != (unsigned)slen 
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
; 
3123     if (longval
) *longval 
= value
; 
3127 /* Try to encode a string object in order to save space */ 
3128 static robj 
*tryObjectEncoding(robj 
*o
) { 
3132     if (o
->encoding 
!= REDIS_ENCODING_RAW
) 
3133         return o
; /* Already encoded */ 
3135     /* It's not safe to encode shared objects: shared objects can be shared 
3136      * everywhere in the "object space" of Redis. Encoded objects can only 
3137      * appear as "values" (and not, for instance, as keys) */ 
3138      if (o
->refcount 
> 1) return o
; 
3140     /* Currently we try to encode only strings */ 
3141     redisAssert(o
->type 
== REDIS_STRING
); 
3143     /* Check if we can represent this string as a long integer */ 
3144     if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
; 
3146     /* Ok, this object can be encoded */ 
3147     if (value 
>= 0 && value 
< REDIS_SHARED_INTEGERS
) { 
3149         incrRefCount(shared
.integers
[value
]); 
3150         return shared
.integers
[value
]; 
3152         o
->encoding 
= REDIS_ENCODING_INT
; 
3154         o
->ptr 
= (void*) value
; 
3159 /* Get a decoded version of an encoded object (returned as a new object). 
3160  * If the object is already raw-encoded just increment the ref count. */ 
3161 static robj 
*getDecodedObject(robj 
*o
) { 
3164     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3168     if (o
->type 
== REDIS_STRING 
&& o
->encoding 
== REDIS_ENCODING_INT
) { 
3171         snprintf(buf
,32,"%ld",(long)o
->ptr
); 
3172         dec 
= createStringObject(buf
,strlen(buf
)); 
3175         redisPanic("Unknown encoding type"); 
3179 /* Compare two string objects via strcmp() or alike. 
3180  * Note that the objects may be integer-encoded. In such a case we 
3181  * use snprintf() to get a string representation of the numbers on the stack 
3182  * and compare the strings, it's much faster than calling getDecodedObject(). 
3184  * Important note: if objects are not integer encoded, but binary-safe strings, 
3185  * sdscmp() from sds.c will apply memcmp() so this function ca be considered 
3187 static int compareStringObjects(robj 
*a
, robj 
*b
) { 
3188     redisAssert(a
->type 
== REDIS_STRING 
&& b
->type 
== REDIS_STRING
); 
3189     char bufa
[128], bufb
[128], *astr
, *bstr
; 
3192     if (a 
== b
) return 0; 
3193     if (a
->encoding 
!= REDIS_ENCODING_RAW
) { 
3194         snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
); 
3200     if (b
->encoding 
!= REDIS_ENCODING_RAW
) { 
3201         snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
); 
3207     return bothsds 
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
); 
3210 static size_t stringObjectLen(robj 
*o
) { 
3211     redisAssert(o
->type 
== REDIS_STRING
); 
3212     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3213         return sdslen(o
->ptr
); 
3217         return snprintf(buf
,32,"%ld",(long)o
->ptr
); 
3221 static int getDoubleFromObject(robj 
*o
, double *target
) { 
3228         redisAssert(o
->type 
== REDIS_STRING
); 
3229         if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3230             value 
= strtod(o
->ptr
, &eptr
); 
3231             if (eptr
[0] != '\0') return REDIS_ERR
; 
3232         } else if (o
->encoding 
== REDIS_ENCODING_INT
) { 
3233             value 
= (long)o
->ptr
; 
3235             redisPanic("Unknown string encoding"); 
3243 static int getDoubleFromObjectOrReply(redisClient 
*c
, robj 
*o
, double *target
, const char *msg
) { 
3245     if (getDoubleFromObject(o
, &value
) != REDIS_OK
) { 
3247             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3249             addReplySds(c
, sdsnew("-ERR value is not a double\r\n")); 
3258 static int getLongLongFromObject(robj 
*o
, long long *target
) { 
3265         redisAssert(o
->type 
== REDIS_STRING
); 
3266         if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3267             value 
= strtoll(o
->ptr
, &eptr
, 10); 
3268             if (eptr
[0] != '\0') return REDIS_ERR
; 
3269         } else if (o
->encoding 
== REDIS_ENCODING_INT
) { 
3270             value 
= (long)o
->ptr
; 
3272             redisPanic("Unknown string encoding"); 
3280 static int getLongLongFromObjectOrReply(redisClient 
*c
, robj 
*o
, long long *target
, const char *msg
) { 
3282     if (getLongLongFromObject(o
, &value
) != REDIS_OK
) { 
3284             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3286             addReplySds(c
, sdsnew("-ERR value is not an integer\r\n")); 
3295 static int getLongFromObjectOrReply(redisClient 
*c
, robj 
*o
, long *target
, const char *msg
) { 
3298     if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
; 
3299     if (value 
< LONG_MIN 
|| value 
> LONG_MAX
) { 
3301             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3303             addReplySds(c
, sdsnew("-ERR value is out of range\r\n")); 
3312 /*============================ RDB saving/loading =========================== */ 
3314 static int rdbSaveType(FILE *fp
, unsigned char type
) { 
3315     if (fwrite(&type
,1,1,fp
) == 0) return -1; 
3319 static int rdbSaveTime(FILE *fp
, time_t t
) { 
3320     int32_t t32 
= (int32_t) t
; 
3321     if (fwrite(&t32
,4,1,fp
) == 0) return -1; 
3325 /* check rdbLoadLen() comments for more info */ 
3326 static int rdbSaveLen(FILE *fp
, uint32_t len
) { 
3327     unsigned char buf
[2]; 
3330         /* Save a 6 bit len */ 
3331         buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6); 
3332         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3333     } else if (len 
< (1<<14)) { 
3334         /* Save a 14 bit len */ 
3335         buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6); 
3337         if (fwrite(buf
,2,1,fp
) == 0) return -1; 
3339         /* Save a 32 bit len */ 
3340         buf
[0] = (REDIS_RDB_32BITLEN
<<6); 
3341         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3343         if (fwrite(&len
,4,1,fp
) == 0) return -1; 
3348 /* String objects in the form "2391" "-100" without any space and with a 
3349  * range of values that can fit in an 8, 16 or 32 bit signed value can be 
3350  * encoded as integers to save space */ 
3351 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) { 
3353     char *endptr
, buf
[32]; 
3355     /* Check if it's possible to encode this value as a number */ 
3356     value 
= strtoll(s
, &endptr
, 10); 
3357     if (endptr
[0] != '\0') return 0; 
3358     snprintf(buf
,32,"%lld",value
); 
3360     /* If the number converted back into a string is not identical 
3361      * then it's not possible to encode the string as integer */ 
3362     if (strlen(buf
) != len 
|| memcmp(buf
,s
,len
)) return 0; 
3364     /* Finally check if it fits in our ranges */ 
3365     if (value 
>= -(1<<7) && value 
<= (1<<7)-1) { 
3366         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
; 
3367         enc
[1] = value
&0xFF; 
3369     } else if (value 
>= -(1<<15) && value 
<= (1<<15)-1) { 
3370         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
; 
3371         enc
[1] = value
&0xFF; 
3372         enc
[2] = (value
>>8)&0xFF; 
3374     } else if (value 
>= -((long long)1<<31) && value 
<= ((long long)1<<31)-1) { 
3375         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
; 
3376         enc
[1] = value
&0xFF; 
3377         enc
[2] = (value
>>8)&0xFF; 
3378         enc
[3] = (value
>>16)&0xFF; 
3379         enc
[4] = (value
>>24)&0xFF; 
3386 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) { 
3387     size_t comprlen
, outlen
; 
3391     /* We require at least four bytes compression for this to be worth it */ 
3392     if (len 
<= 4) return 0; 
3394     if ((out 
= zmalloc(outlen
+1)) == NULL
) return 0; 
3395     comprlen 
= lzf_compress(s
, len
, out
, outlen
); 
3396     if (comprlen 
== 0) { 
3400     /* Data compressed! Let's save it on disk */ 
3401     byte 
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
; 
3402     if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
; 
3403     if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
; 
3404     if (rdbSaveLen(fp
,len
) == -1) goto writeerr
; 
3405     if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
; 
3414 /* Save a string objet as [len][data] on disk. If the object is a string 
3415  * representation of an integer value we try to safe it in a special form */ 
3416 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) { 
3419     /* Try integer encoding */ 
3421         unsigned char buf
[5]; 
3422         if ((enclen 
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) { 
3423             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3428     /* Try LZF compression - under 20 bytes it's unable to compress even 
3429      * aaaaaaaaaaaaaaaaaa so skip it */ 
3430     if (server
.rdbcompression 
&& len 
> 20) { 
3433         retval 
= rdbSaveLzfStringObject(fp
,s
,len
); 
3434         if (retval 
== -1) return -1; 
3435         if (retval 
> 0) return 0; 
3436         /* retval == 0 means data can't be compressed, save the old way */ 
3439     /* Store verbatim */ 
3440     if (rdbSaveLen(fp
,len
) == -1) return -1; 
3441     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return -1; 
3445 /* Like rdbSaveStringObjectRaw() but handle encoded objects */ 
3446 static int rdbSaveStringObject(FILE *fp
, robj 
*obj
) { 
3449     /* Avoid incr/decr ref count business when possible. 
3450      * This plays well with copy-on-write given that we are probably 
3451      * in a child process (BGSAVE). Also this makes sure key objects 
3452      * of swapped objects are not incRefCount-ed (an assert does not allow 
3453      * this in order to avoid bugs) */ 
3454     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
3455         obj 
= getDecodedObject(obj
); 
3456         retval 
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3459         retval 
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3464 /* Save a double value. Doubles are saved as strings prefixed by an unsigned 
3465  * 8 bit integer specifing the length of the representation. 
3466  * This 8 bit integer has special values in order to specify the following 
3472 static int rdbSaveDoubleValue(FILE *fp
, double val
) { 
3473     unsigned char buf
[128]; 
3479     } else if (!isfinite(val
)) { 
3481         buf
[0] = (val 
< 0) ? 255 : 254; 
3483         snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
); 
3484         buf
[0] = strlen((char*)buf
+1); 
3487     if (fwrite(buf
,len
,1,fp
) == 0) return -1; 
3491 /* Save a Redis object. */ 
3492 static int rdbSaveObject(FILE *fp
, robj 
*o
) { 
3493     if (o
->type 
== REDIS_STRING
) { 
3494         /* Save a string value */ 
3495         if (rdbSaveStringObject(fp
,o
) == -1) return -1; 
3496     } else if (o
->type 
== REDIS_LIST
) { 
3497         /* Save a list value */ 
3498         list 
*list 
= o
->ptr
; 
3502         if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1; 
3503         listRewind(list
,&li
); 
3504         while((ln 
= listNext(&li
))) { 
3505             robj 
*eleobj 
= listNodeValue(ln
); 
3507             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3509     } else if (o
->type 
== REDIS_SET
) { 
3510         /* Save a set value */ 
3512         dictIterator 
*di 
= dictGetIterator(set
); 
3515         if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1; 
3516         while((de 
= dictNext(di
)) != NULL
) { 
3517             robj 
*eleobj 
= dictGetEntryKey(de
); 
3519             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3521         dictReleaseIterator(di
); 
3522     } else if (o
->type 
== REDIS_ZSET
) { 
3523         /* Save a set value */ 
3525         dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
3528         if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1; 
3529         while((de 
= dictNext(di
)) != NULL
) { 
3530             robj 
*eleobj 
= dictGetEntryKey(de
); 
3531             double *score 
= dictGetEntryVal(de
); 
3533             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3534             if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1; 
3536         dictReleaseIterator(di
); 
3537     } else if (o
->type 
== REDIS_HASH
) { 
3538         /* Save a hash value */ 
3539         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
3540             unsigned char *p 
= zipmapRewind(o
->ptr
); 
3541             unsigned int count 
= zipmapLen(o
->ptr
); 
3542             unsigned char *key
, *val
; 
3543             unsigned int klen
, vlen
; 
3545             if (rdbSaveLen(fp
,count
) == -1) return -1; 
3546             while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
3547                 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1; 
3548                 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1; 
3551             dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
3554             if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1; 
3555             while((de 
= dictNext(di
)) != NULL
) { 
3556                 robj 
*key 
= dictGetEntryKey(de
); 
3557                 robj 
*val 
= dictGetEntryVal(de
); 
3559                 if (rdbSaveStringObject(fp
,key
) == -1) return -1; 
3560                 if (rdbSaveStringObject(fp
,val
) == -1) return -1; 
3562             dictReleaseIterator(di
); 
3565         redisPanic("Unknown object type"); 
3570 /* Return the length the object will have on disk if saved with 
3571  * the rdbSaveObject() function. Currently we use a trick to get 
3572  * this length with very little changes to the code. In the future 
3573  * we could switch to a faster solution. */ 
3574 static off_t 
rdbSavedObjectLen(robj 
*o
, FILE *fp
) { 
3575     if (fp 
== NULL
) fp 
= server
.devnull
; 
3577     assert(rdbSaveObject(fp
,o
) != 1); 
3581 /* Return the number of pages required to save this object in the swap file */ 
3582 static off_t 
rdbSavedObjectPages(robj 
*o
, FILE *fp
) { 
3583     off_t bytes 
= rdbSavedObjectLen(o
,fp
); 
3585     return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
; 
3588 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */ 
3589 static int rdbSave(char *filename
) { 
3590     dictIterator 
*di 
= NULL
; 
3595     time_t now 
= time(NULL
); 
3597     /* Wait for I/O therads to terminate, just in case this is a 
3598      * foreground-saving, to avoid seeking the swap file descriptor at the 
3600     if (server
.vm_enabled
) 
3601         waitEmptyIOJobsQueue(); 
3603     snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid()); 
3604     fp 
= fopen(tmpfile
,"w"); 
3606         redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
)); 
3609     if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
; 
3610     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
3611         redisDb 
*db 
= server
.db
+j
; 
3613         if (dictSize(d
) == 0) continue; 
3614         di 
= dictGetIterator(d
); 
3620         /* Write the SELECT DB opcode */ 
3621         if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
; 
3622         if (rdbSaveLen(fp
,j
) == -1) goto werr
; 
3624         /* Iterate this DB writing every entry */ 
3625         while((de 
= dictNext(di
)) != NULL
) { 
3626             robj 
*key 
= dictGetEntryKey(de
); 
3627             robj 
*o 
= dictGetEntryVal(de
); 
3628             time_t expiretime 
= getExpire(db
,key
); 
3630             /* Save the expire time */ 
3631             if (expiretime 
!= -1) { 
3632                 /* If this key is already expired skip it */ 
3633                 if (expiretime 
< now
) continue; 
3634                 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
; 
3635                 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
; 
3637             /* Save the key and associated value. This requires special 
3638              * handling if the value is swapped out. */ 
3639             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
3640                                       key
->storage 
== REDIS_VM_SWAPPING
) { 
3641                 /* Save type, key, value */ 
3642                 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
; 
3643                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3644                 if (rdbSaveObject(fp
,o
) == -1) goto werr
; 
3646                 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
3648                 /* Get a preview of the object in memory */ 
3649                 po 
= vmPreviewObject(key
); 
3650                 /* Save type, key, value */ 
3651                 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
; 
3652                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3653                 if (rdbSaveObject(fp
,po
) == -1) goto werr
; 
3654                 /* Remove the loaded object from memory */ 
3658         dictReleaseIterator(di
); 
3661     if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
; 
3663     /* Make sure data will not remain on the OS's output buffers */ 
3668     /* Use RENAME to make sure the DB file is changed atomically only 
3669      * if the generate DB file is ok. */ 
3670     if (rename(tmpfile
,filename
) == -1) { 
3671         redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
)); 
3675     redisLog(REDIS_NOTICE
,"DB saved on disk"); 
3677     server
.lastsave 
= time(NULL
); 
3683     redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
)); 
3684     if (di
) dictReleaseIterator(di
); 
3688 static int rdbSaveBackground(char *filename
) { 
3691     if (server
.bgsavechildpid 
!= -1) return REDIS_ERR
; 
3692     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
3693     if ((childpid 
= fork()) == 0) { 
3695         if (server
.vm_enabled
) vmReopenSwapFile(); 
3697         if (rdbSave(filename
) == REDIS_OK
) { 
3704         if (childpid 
== -1) { 
3705             redisLog(REDIS_WARNING
,"Can't save in background: fork: %s", 
3709         redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
); 
3710         server
.bgsavechildpid 
= childpid
; 
3711         updateDictResizePolicy(); 
3714     return REDIS_OK
; /* unreached */ 
3717 static void rdbRemoveTempFile(pid_t childpid
) { 
3720     snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
); 
3724 static int rdbLoadType(FILE *fp
) { 
3726     if (fread(&type
,1,1,fp
) == 0) return -1; 
3730 static time_t rdbLoadTime(FILE *fp
) { 
3732     if (fread(&t32
,4,1,fp
) == 0) return -1; 
3733     return (time_t) t32
; 
3736 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top 
3737  * of this file for a description of how this are stored on disk. 
3739  * isencoded is set to 1 if the readed length is not actually a length but 
3740  * an "encoding type", check the above comments for more info */ 
3741 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) { 
3742     unsigned char buf
[2]; 
3746     if (isencoded
) *isencoded 
= 0; 
3747     if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3748     type 
= (buf
[0]&0xC0)>>6; 
3749     if (type 
== REDIS_RDB_6BITLEN
) { 
3750         /* Read a 6 bit len */ 
3752     } else if (type 
== REDIS_RDB_ENCVAL
) { 
3753         /* Read a 6 bit len encoding type */ 
3754         if (isencoded
) *isencoded 
= 1; 
3756     } else if (type 
== REDIS_RDB_14BITLEN
) { 
3757         /* Read a 14 bit len */ 
3758         if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3759         return ((buf
[0]&0x3F)<<8)|buf
[1]; 
3761         /* Read a 32 bit len */ 
3762         if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3767 static robj 
*rdbLoadIntegerObject(FILE *fp
, int enctype
) { 
3768     unsigned char enc
[4]; 
3771     if (enctype 
== REDIS_RDB_ENC_INT8
) { 
3772         if (fread(enc
,1,1,fp
) == 0) return NULL
; 
3773         val 
= (signed char)enc
[0]; 
3774     } else if (enctype 
== REDIS_RDB_ENC_INT16
) { 
3776         if (fread(enc
,2,1,fp
) == 0) return NULL
; 
3777         v 
= enc
[0]|(enc
[1]<<8); 
3779     } else if (enctype 
== REDIS_RDB_ENC_INT32
) { 
3781         if (fread(enc
,4,1,fp
) == 0) return NULL
; 
3782         v 
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24); 
3785         val 
= 0; /* anti-warning */ 
3786         redisPanic("Unknown RDB integer encoding type"); 
3788     return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
)); 
3791 static robj 
*rdbLoadLzfStringObject(FILE*fp
) { 
3792     unsigned int len
, clen
; 
3793     unsigned char *c 
= NULL
; 
3796     if ((clen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3797     if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3798     if ((c 
= zmalloc(clen
)) == NULL
) goto err
; 
3799     if ((val 
= sdsnewlen(NULL
,len
)) == NULL
) goto err
; 
3800     if (fread(c
,clen
,1,fp
) == 0) goto err
; 
3801     if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
; 
3803     return createObject(REDIS_STRING
,val
); 
3810 static robj 
*rdbLoadStringObject(FILE*fp
) { 
3815     len 
= rdbLoadLen(fp
,&isencoded
); 
3818         case REDIS_RDB_ENC_INT8
: 
3819         case REDIS_RDB_ENC_INT16
: 
3820         case REDIS_RDB_ENC_INT32
: 
3821             return rdbLoadIntegerObject(fp
,len
); 
3822         case REDIS_RDB_ENC_LZF
: 
3823             return rdbLoadLzfStringObject(fp
); 
3825             redisPanic("Unknown RDB encoding type"); 
3829     if (len 
== REDIS_RDB_LENERR
) return NULL
; 
3830     val 
= sdsnewlen(NULL
,len
); 
3831     if (len 
&& fread(val
,len
,1,fp
) == 0) { 
3835     return createObject(REDIS_STRING
,val
); 
3838 /* For information about double serialization check rdbSaveDoubleValue() */ 
3839 static int rdbLoadDoubleValue(FILE *fp
, double *val
) { 
3843     if (fread(&len
,1,1,fp
) == 0) return -1; 
3845     case 255: *val 
= R_NegInf
; return 0; 
3846     case 254: *val 
= R_PosInf
; return 0; 
3847     case 253: *val 
= R_Nan
; return 0; 
3849         if (fread(buf
,len
,1,fp
) == 0) return -1; 
3851         sscanf(buf
, "%lg", val
); 
3856 /* Load a Redis object of the specified type from the specified file. 
3857  * On success a newly allocated object is returned, otherwise NULL. */ 
3858 static robj 
*rdbLoadObject(int type
, FILE *fp
) { 
3861     redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
)); 
3862     if (type 
== REDIS_STRING
) { 
3863         /* Read string value */ 
3864         if ((o 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3865         o 
= tryObjectEncoding(o
); 
3866     } else if (type 
== REDIS_LIST 
|| type 
== REDIS_SET
) { 
3867         /* Read list/set value */ 
3870         if ((listlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3871         o 
= (type 
== REDIS_LIST
) ? createListObject() : createSetObject(); 
3872         /* It's faster to expand the dict to the right size asap in order 
3873          * to avoid rehashing */ 
3874         if (type 
== REDIS_SET 
&& listlen 
> DICT_HT_INITIAL_SIZE
) 
3875             dictExpand(o
->ptr
,listlen
); 
3876         /* Load every single element of the list/set */ 
3880             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3881             ele 
= tryObjectEncoding(ele
); 
3882             if (type 
== REDIS_LIST
) { 
3883                 listAddNodeTail((list
*)o
->ptr
,ele
); 
3885                 dictAdd((dict
*)o
->ptr
,ele
,NULL
); 
3888     } else if (type 
== REDIS_ZSET
) { 
3889         /* Read list/set value */ 
3893         if ((zsetlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3894         o 
= createZsetObject(); 
3896         /* Load every single element of the list/set */ 
3899             double *score 
= zmalloc(sizeof(double)); 
3901             if ((ele 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3902             ele 
= tryObjectEncoding(ele
); 
3903             if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
; 
3904             dictAdd(zs
->dict
,ele
,score
); 
3905             zslInsert(zs
->zsl
,*score
,ele
); 
3906             incrRefCount(ele
); /* added to skiplist */ 
3908     } else if (type 
== REDIS_HASH
) { 
3911         if ((hashlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3912         o 
= createHashObject(); 
3913         /* Too many entries? Use an hash table. */ 
3914         if (hashlen 
> server
.hash_max_zipmap_entries
) 
3915             convertToRealHash(o
); 
3916         /* Load every key/value, then set it into the zipmap or hash 
3917          * table, as needed. */ 
3921             if ((key 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3922             if ((val 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
3923             /* If we are using a zipmap and there are too big values 
3924              * the object is converted to real hash table encoding. */ 
3925             if (o
->encoding 
!= REDIS_ENCODING_HT 
&& 
3926                (sdslen(key
->ptr
) > server
.hash_max_zipmap_value 
|| 
3927                 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
)) 
3929                     convertToRealHash(o
); 
3932             if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
3933                 unsigned char *zm 
= o
->ptr
; 
3935                 zm 
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
), 
3936                                   val
->ptr
,sdslen(val
->ptr
),NULL
); 
3941                 key 
= tryObjectEncoding(key
); 
3942                 val 
= tryObjectEncoding(val
); 
3943                 dictAdd((dict
*)o
->ptr
,key
,val
); 
3947         redisPanic("Unknown object type"); 
3952 static int rdbLoad(char *filename
) { 
3954     robj 
*keyobj 
= NULL
; 
3956     int type
, retval
, rdbver
; 
3957     dict 
*d 
= server
.db
[0].dict
; 
3958     redisDb 
*db 
= server
.db
+0; 
3960     time_t expiretime 
= -1, now 
= time(NULL
); 
3961     long long loadedkeys 
= 0; 
3963     fp 
= fopen(filename
,"r"); 
3964     if (!fp
) return REDIS_ERR
; 
3965     if (fread(buf
,9,1,fp
) == 0) goto eoferr
; 
3967     if (memcmp(buf
,"REDIS",5) != 0) { 
3969         redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file"); 
3972     rdbver 
= atoi(buf
+5); 
3975         redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
); 
3982         if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3983         if (type 
== REDIS_EXPIRETIME
) { 
3984             if ((expiretime 
= rdbLoadTime(fp
)) == -1) goto eoferr
; 
3985             /* We read the time so we need to read the object type again */ 
3986             if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
3988         if (type 
== REDIS_EOF
) break; 
3989         /* Handle SELECT DB opcode as a special case */ 
3990         if (type 
== REDIS_SELECTDB
) { 
3991             if ((dbid 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) 
3993             if (dbid 
>= (unsigned)server
.dbnum
) { 
3994                 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
); 
3997             db 
= server
.db
+dbid
; 
4002         if ((keyobj 
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
; 
4004         if ((o 
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
; 
4005         /* Add the new object in the hash table */ 
4006         retval 
= dictAdd(d
,keyobj
,o
); 
4007         if (retval 
== DICT_ERR
) { 
4008             redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
); 
4011         /* Set the expire time if needed */ 
4012         if (expiretime 
!= -1) { 
4013             setExpire(db
,keyobj
,expiretime
); 
4014             /* Delete this key if already expired */ 
4015             if (expiretime 
< now
) deleteKey(db
,keyobj
); 
4019         /* Handle swapping while loading big datasets when VM is on */ 
4021         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
4022             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
4023                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
4030 eoferr
: /* unexpected end of file is handled here with a fatal exit */ 
4031     if (keyobj
) decrRefCount(keyobj
); 
4032     redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now."); 
4034     return REDIS_ERR
; /* Just to avoid warning */ 
4037 /*================================== Commands =============================== */ 
4039 static void authCommand(redisClient 
*c
) { 
4040     if (!server
.requirepass 
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) { 
4041       c
->authenticated 
= 1; 
4042       addReply(c
,shared
.ok
); 
4044       c
->authenticated 
= 0; 
4045       addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); 
4049 static void pingCommand(redisClient 
*c
) { 
4050     addReply(c
,shared
.pong
); 
4053 static void echoCommand(redisClient 
*c
) { 
4054     addReplyBulk(c
,c
->argv
[1]); 
4057 /*=================================== Strings =============================== */ 
4059 static void setGenericCommand(redisClient 
*c
, int nx
, robj 
*key
, robj 
*val
, robj 
*expire
) { 
4061     long seconds 
= 0; /* initialized to avoid an harmness warning */ 
4064         if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
) 
4067             addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n")); 
4072     if (nx
) deleteIfVolatile(c
->db
,key
); 
4073     retval 
= dictAdd(c
->db
->dict
,key
,val
); 
4074     if (retval 
== DICT_ERR
) { 
4076             /* If the key is about a swapped value, we want a new key object 
4077              * to overwrite the old. So we delete the old key in the database. 
4078              * This will also make sure that swap pages about the old object 
4079              * will be marked as free. */ 
4080             if (server
.vm_enabled 
&& deleteIfSwapped(c
->db
,key
)) 
4082             dictReplace(c
->db
->dict
,key
,val
); 
4085             addReply(c
,shared
.czero
); 
4093     removeExpire(c
->db
,key
); 
4094     if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
); 
4095     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
4098 static void setCommand(redisClient 
*c
) { 
4099     setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
); 
4102 static void setnxCommand(redisClient 
*c
) { 
4103     setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
); 
4106 static void setexCommand(redisClient 
*c
) { 
4107     setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]); 
4110 static int getGenericCommand(redisClient 
*c
) { 
4113     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
) 
4116     if (o
->type 
!= REDIS_STRING
) { 
4117         addReply(c
,shared
.wrongtypeerr
); 
4125 static void getCommand(redisClient 
*c
) { 
4126     getGenericCommand(c
); 
4129 static void getsetCommand(redisClient 
*c
) { 
4130     if (getGenericCommand(c
) == REDIS_ERR
) return; 
4131     if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) { 
4132         dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
4134         incrRefCount(c
->argv
[1]); 
4136     incrRefCount(c
->argv
[2]); 
4138     removeExpire(c
->db
,c
->argv
[1]); 
4141 static void mgetCommand(redisClient 
*c
) { 
4144     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1)); 
4145     for (j 
= 1; j 
< c
->argc
; j
++) { 
4146         robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[j
]); 
4148             addReply(c
,shared
.nullbulk
); 
4150             if (o
->type 
!= REDIS_STRING
) { 
4151                 addReply(c
,shared
.nullbulk
); 
4159 static void msetGenericCommand(redisClient 
*c
, int nx
) { 
4160     int j
, busykeys 
= 0; 
4162     if ((c
->argc 
% 2) == 0) { 
4163         addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n")); 
4166     /* Handle the NX flag. The MSETNX semantic is to return zero and don't 
4167      * set nothing at all if at least one already key exists. */ 
4169         for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
4170             if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) { 
4176         addReply(c
, shared
.czero
); 
4180     for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
4183         c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]); 
4184         retval 
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
4185         if (retval 
== DICT_ERR
) { 
4186             dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
4187             incrRefCount(c
->argv
[j
+1]); 
4189             incrRefCount(c
->argv
[j
]); 
4190             incrRefCount(c
->argv
[j
+1]); 
4192         removeExpire(c
->db
,c
->argv
[j
]); 
4194     server
.dirty 
+= (c
->argc
-1)/2; 
4195     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
4198 static void msetCommand(redisClient 
*c
) { 
4199     msetGenericCommand(c
,0); 
4202 static void msetnxCommand(redisClient 
*c
) { 
4203     msetGenericCommand(c
,1); 
4206 static void incrDecrCommand(redisClient 
*c
, long long incr
) { 
4211     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4213     if (getLongLongFromObjectOrReply(c
, o
, &value
, NULL
) != REDIS_OK
) return; 
4216     o 
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
)); 
4217     o 
= tryObjectEncoding(o
); 
4218     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
4219     if (retval 
== DICT_ERR
) { 
4220         dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
4221         removeExpire(c
->db
,c
->argv
[1]); 
4223         incrRefCount(c
->argv
[1]); 
4226     addReply(c
,shared
.colon
); 
4228     addReply(c
,shared
.crlf
); 
4231 static void incrCommand(redisClient 
*c
) { 
4232     incrDecrCommand(c
,1); 
4235 static void decrCommand(redisClient 
*c
) { 
4236     incrDecrCommand(c
,-1); 
4239 static void incrbyCommand(redisClient 
*c
) { 
4242     if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return; 
4243     incrDecrCommand(c
,incr
); 
4246 static void decrbyCommand(redisClient 
*c
) { 
4249     if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return; 
4250     incrDecrCommand(c
,-incr
); 
4253 static void appendCommand(redisClient 
*c
) { 
4258     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4260         /* Create the key */ 
4261         retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
4262         incrRefCount(c
->argv
[1]); 
4263         incrRefCount(c
->argv
[2]); 
4264         totlen 
= stringObjectLen(c
->argv
[2]); 
4268         de 
= dictFind(c
->db
->dict
,c
->argv
[1]); 
4271         o 
= dictGetEntryVal(de
); 
4272         if (o
->type 
!= REDIS_STRING
) { 
4273             addReply(c
,shared
.wrongtypeerr
); 
4276         /* If the object is specially encoded or shared we have to make 
4278         if (o
->refcount 
!= 1 || o
->encoding 
!= REDIS_ENCODING_RAW
) { 
4279             robj 
*decoded 
= getDecodedObject(o
); 
4281             o 
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
)); 
4282             decrRefCount(decoded
); 
4283             dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
4286         if (c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW
) { 
4287             o
->ptr 
= sdscatlen(o
->ptr
, 
4288                 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
)); 
4290             o
->ptr 
= sdscatprintf(o
->ptr
, "%ld", 
4291                 (unsigned long) c
->argv
[2]->ptr
); 
4293         totlen 
= sdslen(o
->ptr
); 
4296     addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
)); 
4299 static void substrCommand(redisClient 
*c
) { 
4301     long start 
= atoi(c
->argv
[2]->ptr
); 
4302     long end 
= atoi(c
->argv
[3]->ptr
); 
4303     size_t rangelen
, strlen
; 
4306     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4307         checkType(c
,o
,REDIS_STRING
)) return; 
4309     o 
= getDecodedObject(o
); 
4310     strlen 
= sdslen(o
->ptr
); 
4312     /* convert negative indexes */ 
4313     if (start 
< 0) start 
= strlen
+start
; 
4314     if (end 
< 0) end 
= strlen
+end
; 
4315     if (start 
< 0) start 
= 0; 
4316     if (end 
< 0) end 
= 0; 
4318     /* indexes sanity checks */ 
4319     if (start 
> end 
|| (size_t)start 
>= strlen
) { 
4320         /* Out of range start or start > end result in null reply */ 
4321         addReply(c
,shared
.nullbulk
); 
4325     if ((size_t)end 
>= strlen
) end 
= strlen
-1; 
4326     rangelen 
= (end
-start
)+1; 
4328     /* Return the result */ 
4329     addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
)); 
4330     range 
= sdsnewlen((char*)o
->ptr
+start
,rangelen
); 
4331     addReplySds(c
,range
); 
4332     addReply(c
,shared
.crlf
); 
4336 /* ========================= Type agnostic commands ========================= */ 
4338 static void delCommand(redisClient 
*c
) { 
4341     for (j 
= 1; j 
< c
->argc
; j
++) { 
4342         if (deleteKey(c
->db
,c
->argv
[j
])) { 
4347     addReplyLong(c
,deleted
); 
4350 static void existsCommand(redisClient 
*c
) { 
4351     expireIfNeeded(c
->db
,c
->argv
[1]); 
4352     if (dictFind(c
->db
->dict
,c
->argv
[1])) { 
4353         addReply(c
, shared
.cone
); 
4355         addReply(c
, shared
.czero
); 
4359 static void selectCommand(redisClient 
*c
) { 
4360     int id 
= atoi(c
->argv
[1]->ptr
); 
4362     if (selectDb(c
,id
) == REDIS_ERR
) { 
4363         addReplySds(c
,sdsnew("-ERR invalid DB index\r\n")); 
4365         addReply(c
,shared
.ok
); 
4369 static void randomkeyCommand(redisClient 
*c
) { 
4374         de 
= dictGetRandomKey(c
->db
->dict
); 
4375         if (!de 
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break; 
4379         addReply(c
,shared
.nullbulk
); 
4383     key 
= dictGetEntryKey(de
); 
4384     if (server
.vm_enabled
) { 
4385         key 
= dupStringObject(key
); 
4386         addReplyBulk(c
,key
); 
4389         addReplyBulk(c
,key
); 
4393 static void keysCommand(redisClient 
*c
) { 
4396     sds pattern 
= c
->argv
[1]->ptr
; 
4397     int plen 
= sdslen(pattern
); 
4398     unsigned long numkeys 
= 0; 
4399     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
4401     di 
= dictGetIterator(c
->db
->dict
); 
4403     decrRefCount(lenobj
); 
4404     while((de 
= dictNext(di
)) != NULL
) { 
4405         robj 
*keyobj 
= dictGetEntryKey(de
); 
4407         sds key 
= keyobj
->ptr
; 
4408         if ((pattern
[0] == '*' && pattern
[1] == '\0') || 
4409             stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
4410             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
4411                 addReplyBulk(c
,keyobj
); 
4416     dictReleaseIterator(di
); 
4417     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
); 
4420 static void dbsizeCommand(redisClient 
*c
) { 
4422         sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
))); 
4425 static void lastsaveCommand(redisClient 
*c
) { 
4427         sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
)); 
4430 static void typeCommand(redisClient 
*c
) { 
4434     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4439         case REDIS_STRING
: type 
= "+string"; break; 
4440         case REDIS_LIST
: type 
= "+list"; break; 
4441         case REDIS_SET
: type 
= "+set"; break; 
4442         case REDIS_ZSET
: type 
= "+zset"; break; 
4443         case REDIS_HASH
: type 
= "+hash"; break; 
4444         default: type 
= "+unknown"; break; 
4447     addReplySds(c
,sdsnew(type
)); 
4448     addReply(c
,shared
.crlf
); 
4451 static void saveCommand(redisClient 
*c
) { 
4452     if (server
.bgsavechildpid 
!= -1) { 
4453         addReplySds(c
,sdsnew("-ERR background save in progress\r\n")); 
4456     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4457         addReply(c
,shared
.ok
); 
4459         addReply(c
,shared
.err
); 
4463 static void bgsaveCommand(redisClient 
*c
) { 
4464     if (server
.bgsavechildpid 
!= -1) { 
4465         addReplySds(c
,sdsnew("-ERR background save already in progress\r\n")); 
4468     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
4469         char *status 
= "+Background saving started\r\n"; 
4470         addReplySds(c
,sdsnew(status
)); 
4472         addReply(c
,shared
.err
); 
4476 static void shutdownCommand(redisClient 
*c
) { 
4477     redisLog(REDIS_WARNING
,"User requested shutdown, saving DB..."); 
4478     /* Kill the saving child if there is a background saving in progress. 
4479        We want to avoid race conditions, for instance our saving child may 
4480        overwrite the synchronous saving did by SHUTDOWN. */ 
4481     if (server
.bgsavechildpid 
!= -1) { 
4482         redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!"); 
4483         kill(server
.bgsavechildpid
,SIGKILL
); 
4484         rdbRemoveTempFile(server
.bgsavechildpid
); 
4486     if (server
.appendonly
) { 
4487         /* Append only file: fsync() the AOF and exit */ 
4488         fsync(server
.appendfd
); 
4489         if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4492         /* Snapshotting. Perform a SYNC SAVE and exit */ 
4493         if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4494             if (server
.daemonize
) 
4495                 unlink(server
.pidfile
); 
4496             redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory()); 
4497             redisLog(REDIS_WARNING
,"Server exit now, bye bye..."); 
4500             /* Ooops.. error saving! The best we can do is to continue 
4501              * operating. Note that if there was a background saving process, 
4502              * in the next cron() Redis will be notified that the background 
4503              * saving aborted, handling special stuff like slaves pending for 
4504              * synchronization... */ 
4505             redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit"); 
4507                 sdsnew("-ERR can't quit, problems saving the DB\r\n")); 
4512 static void renameGenericCommand(redisClient 
*c
, int nx
) { 
4515     /* To use the same key as src and dst is probably an error */ 
4516     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
4517         addReply(c
,shared
.sameobjecterr
); 
4521     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
) 
4525     deleteIfVolatile(c
->db
,c
->argv
[2]); 
4526     if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) { 
4529             addReply(c
,shared
.czero
); 
4532         dictReplace(c
->db
->dict
,c
->argv
[2],o
); 
4534         incrRefCount(c
->argv
[2]); 
4536     deleteKey(c
->db
,c
->argv
[1]); 
4538     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
4541 static void renameCommand(redisClient 
*c
) { 
4542     renameGenericCommand(c
,0); 
4545 static void renamenxCommand(redisClient 
*c
) { 
4546     renameGenericCommand(c
,1); 
4549 static void moveCommand(redisClient 
*c
) { 
4554     /* Obtain source and target DB pointers */ 
4557     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
4558         addReply(c
,shared
.outofrangeerr
); 
4562     selectDb(c
,srcid
); /* Back to the source DB */ 
4564     /* If the user is moving using as target the same 
4565      * DB as the source DB it is probably an error. */ 
4567         addReply(c
,shared
.sameobjecterr
); 
4571     /* Check if the element exists and get a reference */ 
4572     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4574         addReply(c
,shared
.czero
); 
4578     /* Try to add the element to the target DB */ 
4579     deleteIfVolatile(dst
,c
->argv
[1]); 
4580     if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) { 
4581         addReply(c
,shared
.czero
); 
4584     incrRefCount(c
->argv
[1]); 
4587     /* OK! key moved, free the entry in the source DB */ 
4588     deleteKey(src
,c
->argv
[1]); 
4590     addReply(c
,shared
.cone
); 
4593 /* =================================== Lists ================================ */ 
4594 static void pushGenericCommand(redisClient 
*c
, int where
) { 
4598     lobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4600         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4601             addReply(c
,shared
.cone
); 
4604         lobj 
= createListObject(); 
4606         if (where 
== REDIS_HEAD
) { 
4607             listAddNodeHead(list
,c
->argv
[2]); 
4609             listAddNodeTail(list
,c
->argv
[2]); 
4611         dictAdd(c
->db
->dict
,c
->argv
[1],lobj
); 
4612         incrRefCount(c
->argv
[1]); 
4613         incrRefCount(c
->argv
[2]); 
4615         if (lobj
->type 
!= REDIS_LIST
) { 
4616             addReply(c
,shared
.wrongtypeerr
); 
4619         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4620             addReply(c
,shared
.cone
); 
4624         if (where 
== REDIS_HEAD
) { 
4625             listAddNodeHead(list
,c
->argv
[2]); 
4627             listAddNodeTail(list
,c
->argv
[2]); 
4629         incrRefCount(c
->argv
[2]); 
4632     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
))); 
4635 static void lpushCommand(redisClient 
*c
) { 
4636     pushGenericCommand(c
,REDIS_HEAD
); 
4639 static void rpushCommand(redisClient 
*c
) { 
4640     pushGenericCommand(c
,REDIS_TAIL
); 
4643 static void llenCommand(redisClient 
*c
) { 
4647     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4648         checkType(c
,o
,REDIS_LIST
)) return; 
4651     addReplyUlong(c
,listLength(l
)); 
4654 static void lindexCommand(redisClient 
*c
) { 
4656     int index 
= atoi(c
->argv
[2]->ptr
); 
4660     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4661         checkType(c
,o
,REDIS_LIST
)) return; 
4664     ln 
= listIndex(list
, index
); 
4666         addReply(c
,shared
.nullbulk
); 
4668         robj 
*ele 
= listNodeValue(ln
); 
4669         addReplyBulk(c
,ele
); 
4673 static void lsetCommand(redisClient 
*c
) { 
4675     int index 
= atoi(c
->argv
[2]->ptr
); 
4679     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL 
|| 
4680         checkType(c
,o
,REDIS_LIST
)) return; 
4683     ln 
= listIndex(list
, index
); 
4685         addReply(c
,shared
.outofrangeerr
); 
4687         robj 
*ele 
= listNodeValue(ln
); 
4690         listNodeValue(ln
) = c
->argv
[3]; 
4691         incrRefCount(c
->argv
[3]); 
4692         addReply(c
,shared
.ok
); 
4697 static void popGenericCommand(redisClient 
*c
, int where
) { 
4702     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4703         checkType(c
,o
,REDIS_LIST
)) return; 
4706     if (where 
== REDIS_HEAD
) 
4707         ln 
= listFirst(list
); 
4709         ln 
= listLast(list
); 
4712         addReply(c
,shared
.nullbulk
); 
4714         robj 
*ele 
= listNodeValue(ln
); 
4715         addReplyBulk(c
,ele
); 
4716         listDelNode(list
,ln
); 
4717         if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4722 static void lpopCommand(redisClient 
*c
) { 
4723     popGenericCommand(c
,REDIS_HEAD
); 
4726 static void rpopCommand(redisClient 
*c
) { 
4727     popGenericCommand(c
,REDIS_TAIL
); 
4730 static void lrangeCommand(redisClient 
*c
) { 
4732     int start 
= atoi(c
->argv
[2]->ptr
); 
4733     int end 
= atoi(c
->argv
[3]->ptr
); 
4740     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
4741          || checkType(c
,o
,REDIS_LIST
)) return; 
4743     llen 
= listLength(list
); 
4745     /* convert negative indexes */ 
4746     if (start 
< 0) start 
= llen
+start
; 
4747     if (end 
< 0) end 
= llen
+end
; 
4748     if (start 
< 0) start 
= 0; 
4749     if (end 
< 0) end 
= 0; 
4751     /* indexes sanity checks */ 
4752     if (start 
> end 
|| start 
>= llen
) { 
4753         /* Out of range start or start > end result in empty list */ 
4754         addReply(c
,shared
.emptymultibulk
); 
4757     if (end 
>= llen
) end 
= llen
-1; 
4758     rangelen 
= (end
-start
)+1; 
4760     /* Return the result in form of a multi-bulk reply */ 
4761     ln 
= listIndex(list
, start
); 
4762     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
)); 
4763     for (j 
= 0; j 
< rangelen
; j
++) { 
4764         ele 
= listNodeValue(ln
); 
4765         addReplyBulk(c
,ele
); 
4770 static void ltrimCommand(redisClient 
*c
) { 
4772     int start 
= atoi(c
->argv
[2]->ptr
); 
4773     int end 
= atoi(c
->argv
[3]->ptr
); 
4775     int j
, ltrim
, rtrim
; 
4779     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL 
|| 
4780         checkType(c
,o
,REDIS_LIST
)) return; 
4782     llen 
= listLength(list
); 
4784     /* convert negative indexes */ 
4785     if (start 
< 0) start 
= llen
+start
; 
4786     if (end 
< 0) end 
= llen
+end
; 
4787     if (start 
< 0) start 
= 0; 
4788     if (end 
< 0) end 
= 0; 
4790     /* indexes sanity checks */ 
4791     if (start 
> end 
|| start 
>= llen
) { 
4792         /* Out of range start or start > end result in empty list */ 
4796         if (end 
>= llen
) end 
= llen
-1; 
4801     /* Remove list elements to perform the trim */ 
4802     for (j 
= 0; j 
< ltrim
; j
++) { 
4803         ln 
= listFirst(list
); 
4804         listDelNode(list
,ln
); 
4806     for (j 
= 0; j 
< rtrim
; j
++) { 
4807         ln 
= listLast(list
); 
4808         listDelNode(list
,ln
); 
4810     if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4812     addReply(c
,shared
.ok
); 
4815 static void lremCommand(redisClient 
*c
) { 
4818     listNode 
*ln
, *next
; 
4819     int toremove 
= atoi(c
->argv
[2]->ptr
); 
4823     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4824         checkType(c
,o
,REDIS_LIST
)) return; 
4828         toremove 
= -toremove
; 
4831     ln 
= fromtail 
? list
->tail 
: list
->head
; 
4833         robj 
*ele 
= listNodeValue(ln
); 
4835         next 
= fromtail 
? ln
->prev 
: ln
->next
; 
4836         if (compareStringObjects(ele
,c
->argv
[3]) == 0) { 
4837             listDelNode(list
,ln
); 
4840             if (toremove 
&& removed 
== toremove
) break; 
4844     if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4845     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
)); 
4848 /* This is the semantic of this command: 
4849  *  RPOPLPUSH srclist dstlist: 
4850  *   IF LLEN(srclist) > 0 
4851  *     element = RPOP srclist 
4852  *     LPUSH dstlist element 
4859  * The idea is to be able to get an element from a list in a reliable way 
4860  * since the element is not just returned but pushed against another list 
4861  * as well. This command was originally proposed by Ezra Zygmuntowicz. 
4863 static void rpoplpushcommand(redisClient 
*c
) { 
4868     if ((sobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4869         checkType(c
,sobj
,REDIS_LIST
)) return; 
4870     srclist 
= sobj
->ptr
; 
4871     ln 
= listLast(srclist
); 
4874         addReply(c
,shared
.nullbulk
); 
4876         robj 
*dobj 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4877         robj 
*ele 
= listNodeValue(ln
); 
4880         if (dobj 
&& dobj
->type 
!= REDIS_LIST
) { 
4881             addReply(c
,shared
.wrongtypeerr
); 
4885         /* Add the element to the target list (unless it's directly 
4886          * passed to some BLPOP-ing client */ 
4887         if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) { 
4889                 /* Create the list if the key does not exist */ 
4890                 dobj 
= createListObject(); 
4891                 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
); 
4892                 incrRefCount(c
->argv
[2]); 
4894             dstlist 
= dobj
->ptr
; 
4895             listAddNodeHead(dstlist
,ele
); 
4899         /* Send the element to the client as reply as well */ 
4900         addReplyBulk(c
,ele
); 
4902         /* Finally remove the element from the source list */ 
4903         listDelNode(srclist
,ln
); 
4904         if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4909 /* ==================================== Sets ================================ */ 
4911 static void saddCommand(redisClient 
*c
) { 
4914     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4916         set 
= createSetObject(); 
4917         dictAdd(c
->db
->dict
,c
->argv
[1],set
); 
4918         incrRefCount(c
->argv
[1]); 
4920         if (set
->type 
!= REDIS_SET
) { 
4921             addReply(c
,shared
.wrongtypeerr
); 
4925     if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) { 
4926         incrRefCount(c
->argv
[2]); 
4928         addReply(c
,shared
.cone
); 
4930         addReply(c
,shared
.czero
); 
4934 static void sremCommand(redisClient 
*c
) { 
4937     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4938         checkType(c
,set
,REDIS_SET
)) return; 
4940     if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) { 
4942         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
4943         if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4944         addReply(c
,shared
.cone
); 
4946         addReply(c
,shared
.czero
); 
4950 static void smoveCommand(redisClient 
*c
) { 
4951     robj 
*srcset
, *dstset
; 
4953     srcset 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4954     dstset 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4956     /* If the source key does not exist return 0, if it's of the wrong type 
4958     if (srcset 
== NULL 
|| srcset
->type 
!= REDIS_SET
) { 
4959         addReply(c
, srcset 
? shared
.wrongtypeerr 
: shared
.czero
); 
4962     /* Error if the destination key is not a set as well */ 
4963     if (dstset 
&& dstset
->type 
!= REDIS_SET
) { 
4964         addReply(c
,shared
.wrongtypeerr
); 
4967     /* Remove the element from the source set */ 
4968     if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) { 
4969         /* Key not found in the src set! return zero */ 
4970         addReply(c
,shared
.czero
); 
4973     if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset 
!= dstset
) 
4974         deleteKey(c
->db
,c
->argv
[1]); 
4976     /* Add the element to the destination set */ 
4978         dstset 
= createSetObject(); 
4979         dictAdd(c
->db
->dict
,c
->argv
[2],dstset
); 
4980         incrRefCount(c
->argv
[2]); 
4982     if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
) 
4983         incrRefCount(c
->argv
[3]); 
4984     addReply(c
,shared
.cone
); 
4987 static void sismemberCommand(redisClient 
*c
) { 
4990     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4991         checkType(c
,set
,REDIS_SET
)) return; 
4993     if (dictFind(set
->ptr
,c
->argv
[2])) 
4994         addReply(c
,shared
.cone
); 
4996         addReply(c
,shared
.czero
); 
4999 static void scardCommand(redisClient 
*c
) { 
5003     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5004         checkType(c
,o
,REDIS_SET
)) return; 
5007     addReplyUlong(c
,dictSize(s
)); 
5010 static void spopCommand(redisClient 
*c
) { 
5014     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5015         checkType(c
,set
,REDIS_SET
)) return; 
5017     de 
= dictGetRandomKey(set
->ptr
); 
5019         addReply(c
,shared
.nullbulk
); 
5021         robj 
*ele 
= dictGetEntryKey(de
); 
5023         addReplyBulk(c
,ele
); 
5024         dictDelete(set
->ptr
,ele
); 
5025         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
5026         if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5031 static void srandmemberCommand(redisClient 
*c
) { 
5035     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5036         checkType(c
,set
,REDIS_SET
)) return; 
5038     de 
= dictGetRandomKey(set
->ptr
); 
5040         addReply(c
,shared
.nullbulk
); 
5042         robj 
*ele 
= dictGetEntryKey(de
); 
5044         addReplyBulk(c
,ele
); 
5048 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) { 
5049     dict 
**d1 
= (void*) s1
, **d2 
= (void*) s2
; 
5051     return dictSize(*d1
)-dictSize(*d2
); 
5054 static void sinterGenericCommand(redisClient 
*c
, robj 
**setskeys
, unsigned long setsnum
, robj 
*dstkey
) { 
5055     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
5058     robj 
*lenobj 
= NULL
, *dstset 
= NULL
; 
5059     unsigned long j
, cardinality 
= 0; 
5061     for (j 
= 0; j 
< setsnum
; j
++) { 
5065                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
5066                     lookupKeyRead(c
->db
,setskeys
[j
]); 
5070                 if (deleteKey(c
->db
,dstkey
)) 
5072                 addReply(c
,shared
.czero
); 
5074                 addReply(c
,shared
.emptymultibulk
); 
5078         if (setobj
->type 
!= REDIS_SET
) { 
5080             addReply(c
,shared
.wrongtypeerr
); 
5083         dv
[j
] = setobj
->ptr
; 
5085     /* Sort sets from the smallest to largest, this will improve our 
5086      * algorithm's performace */ 
5087     qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
); 
5089     /* The first thing we should output is the total number of elements... 
5090      * since this is a multi-bulk write, but at this stage we don't know 
5091      * the intersection set size, so we use a trick, append an empty object 
5092      * to the output list and save the pointer to later modify it with the 
5095         lenobj 
= createObject(REDIS_STRING
,NULL
); 
5097         decrRefCount(lenobj
); 
5099         /* If we have a target key where to store the resulting set 
5100          * create this key with an empty set inside */ 
5101         dstset 
= createSetObject(); 
5104     /* Iterate all the elements of the first (smallest) set, and test 
5105      * the element against all the other sets, if at least one set does 
5106      * not include the element it is discarded */ 
5107     di 
= dictGetIterator(dv
[0]); 
5109     while((de 
= dictNext(di
)) != NULL
) { 
5112         for (j 
= 1; j 
< setsnum
; j
++) 
5113             if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break; 
5115             continue; /* at least one set does not contain the member */ 
5116         ele 
= dictGetEntryKey(de
); 
5118             addReplyBulk(c
,ele
); 
5121             dictAdd(dstset
->ptr
,ele
,NULL
); 
5125     dictReleaseIterator(di
); 
5128         /* Store the resulting set into the target, if the intersection 
5129          * is not an empty set. */ 
5130         deleteKey(c
->db
,dstkey
); 
5131         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
5132             dictAdd(c
->db
->dict
,dstkey
,dstset
); 
5133             incrRefCount(dstkey
); 
5134             addReplyLong(c
,dictSize((dict
*)dstset
->ptr
)); 
5136             decrRefCount(dstset
); 
5137             addReply(c
,shared
.czero
); 
5141         lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
); 
5146 static void sinterCommand(redisClient 
*c
) { 
5147     sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
); 
5150 static void sinterstoreCommand(redisClient 
*c
) { 
5151     sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]); 
5154 #define REDIS_OP_UNION 0 
5155 #define REDIS_OP_DIFF 1 
5156 #define REDIS_OP_INTER 2 
5158 static void sunionDiffGenericCommand(redisClient 
*c
, robj 
**setskeys
, int setsnum
, robj 
*dstkey
, int op
) { 
5159     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
5162     robj 
*dstset 
= NULL
; 
5163     int j
, cardinality 
= 0; 
5165     for (j 
= 0; j 
< setsnum
; j
++) { 
5169                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
5170                     lookupKeyRead(c
->db
,setskeys
[j
]); 
5175         if (setobj
->type 
!= REDIS_SET
) { 
5177             addReply(c
,shared
.wrongtypeerr
); 
5180         dv
[j
] = setobj
->ptr
; 
5183     /* We need a temp set object to store our union. If the dstkey 
5184      * is not NULL (that is, we are inside an SUNIONSTORE operation) then 
5185      * this set object will be the resulting object to set into the target key*/ 
5186     dstset 
= createSetObject(); 
5188     /* Iterate all the elements of all the sets, add every element a single 
5189      * time to the result set */ 
5190     for (j 
= 0; j 
< setsnum
; j
++) { 
5191         if (op 
== REDIS_OP_DIFF 
&& j 
== 0 && !dv
[j
]) break; /* result set is empty */ 
5192         if (!dv
[j
]) continue; /* non existing keys are like empty sets */ 
5194         di 
= dictGetIterator(dv
[j
]); 
5196         while((de 
= dictNext(di
)) != NULL
) { 
5199             /* dictAdd will not add the same element multiple times */ 
5200             ele 
= dictGetEntryKey(de
); 
5201             if (op 
== REDIS_OP_UNION 
|| j 
== 0) { 
5202                 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) { 
5206             } else if (op 
== REDIS_OP_DIFF
) { 
5207                 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) { 
5212         dictReleaseIterator(di
); 
5214         /* result set is empty? Exit asap. */ 
5215         if (op 
== REDIS_OP_DIFF 
&& cardinality 
== 0) break; 
5218     /* Output the content of the resulting set, if not in STORE mode */ 
5220         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
)); 
5221         di 
= dictGetIterator(dstset
->ptr
); 
5222         while((de 
= dictNext(di
)) != NULL
) { 
5225             ele 
= dictGetEntryKey(de
); 
5226             addReplyBulk(c
,ele
); 
5228         dictReleaseIterator(di
); 
5229         decrRefCount(dstset
); 
5231         /* If we have a target key where to store the resulting set 
5232          * create this key with the result set inside */ 
5233         deleteKey(c
->db
,dstkey
); 
5234         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
5235             dictAdd(c
->db
->dict
,dstkey
,dstset
); 
5236             incrRefCount(dstkey
); 
5237             addReplyLong(c
,dictSize((dict
*)dstset
->ptr
)); 
5239             decrRefCount(dstset
); 
5240             addReply(c
,shared
.czero
); 
5247 static void sunionCommand(redisClient 
*c
) { 
5248     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
); 
5251 static void sunionstoreCommand(redisClient 
*c
) { 
5252     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
); 
5255 static void sdiffCommand(redisClient 
*c
) { 
5256     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
); 
5259 static void sdiffstoreCommand(redisClient 
*c
) { 
5260     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
); 
5263 /* ==================================== ZSets =============================== */ 
5265 /* ZSETs are ordered sets using two data structures to hold the same elements 
5266  * in order to get O(log(N)) INSERT and REMOVE operations into a sorted 
5269  * The elements are added to an hash table mapping Redis objects to scores. 
5270  * At the same time the elements are added to a skip list mapping scores 
5271  * to Redis objects (so objects are sorted by scores in this "view"). */ 
5273 /* This skiplist implementation is almost a C translation of the original 
5274  * algorithm described by William Pugh in "Skip Lists: A Probabilistic 
5275  * Alternative to Balanced Trees", modified in three ways: 
5276  * a) this implementation allows for repeated values. 
5277  * b) the comparison is not just by key (our 'score') but by satellite data. 
5278  * c) there is a back pointer, so it's a doubly linked list with the back 
5279  * pointers being only at "level 1". This allows to traverse the list 
5280  * from tail to head, useful for ZREVRANGE. */ 
5282 static zskiplistNode 
*zslCreateNode(int level
, double score
, robj 
*obj
) { 
5283     zskiplistNode 
*zn 
= zmalloc(sizeof(*zn
)); 
5285     zn
->forward 
= zmalloc(sizeof(zskiplistNode
*) * level
); 
5287         zn
->span 
= zmalloc(sizeof(unsigned int) * (level 
- 1)); 
5293 static zskiplist 
*zslCreate(void) { 
5297     zsl 
= zmalloc(sizeof(*zsl
)); 
5300     zsl
->header 
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
); 
5301     for (j 
= 0; j 
< ZSKIPLIST_MAXLEVEL
; j
++) { 
5302         zsl
->header
->forward
[j
] = NULL
; 
5304         /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */ 
5305         if (j 
< ZSKIPLIST_MAXLEVEL
-1) 
5306             zsl
->header
->span
[j
] = 0; 
5308     zsl
->header
->backward 
= NULL
; 
5313 static void zslFreeNode(zskiplistNode 
*node
) { 
5314     decrRefCount(node
->obj
); 
5315     zfree(node
->forward
); 
5320 static void zslFree(zskiplist 
*zsl
) { 
5321     zskiplistNode 
*node 
= zsl
->header
->forward
[0], *next
; 
5323     zfree(zsl
->header
->forward
); 
5324     zfree(zsl
->header
->span
); 
5327         next 
= node
->forward
[0]; 
5334 static int zslRandomLevel(void) { 
5336     while ((random()&0xFFFF) < (ZSKIPLIST_P 
* 0xFFFF)) 
5338     return (level
<ZSKIPLIST_MAXLEVEL
) ? level 
: ZSKIPLIST_MAXLEVEL
; 
5341 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5342     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5343     unsigned int rank
[ZSKIPLIST_MAXLEVEL
]; 
5347     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5348         /* store rank that is crossed to reach the insert position */ 
5349         rank
[i
] = i 
== (zsl
->level
-1) ? 0 : rank
[i
+1]; 
5351         while (x
->forward
[i
] && 
5352             (x
->forward
[i
]->score 
< score 
|| 
5353                 (x
->forward
[i
]->score 
== score 
&& 
5354                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) { 
5355             rank
[i
] += i 
> 0 ? x
->span
[i
-1] : 1; 
5360     /* we assume the key is not already inside, since we allow duplicated 
5361      * scores, and the re-insertion of score and redis object should never 
5362      * happpen since the caller of zslInsert() should test in the hash table 
5363      * if the element is already inside or not. */ 
5364     level 
= zslRandomLevel(); 
5365     if (level 
> zsl
->level
) { 
5366         for (i 
= zsl
->level
; i 
< level
; i
++) { 
5368             update
[i
] = zsl
->header
; 
5369             update
[i
]->span
[i
-1] = zsl
->length
; 
5373     x 
= zslCreateNode(level
,score
,obj
); 
5374     for (i 
= 0; i 
< level
; i
++) { 
5375         x
->forward
[i
] = update
[i
]->forward
[i
]; 
5376         update
[i
]->forward
[i
] = x
; 
5378         /* update span covered by update[i] as x is inserted here */ 
5380             x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]); 
5381             update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1; 
5385     /* increment span for untouched levels */ 
5386     for (i 
= level
; i 
< zsl
->level
; i
++) { 
5387         update
[i
]->span
[i
-1]++; 
5390     x
->backward 
= (update
[0] == zsl
->header
) ? NULL 
: update
[0]; 
5392         x
->forward
[0]->backward 
= x
; 
5398 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */ 
5399 void zslDeleteNode(zskiplist 
*zsl
, zskiplistNode 
*x
, zskiplistNode 
**update
) { 
5401     for (i 
= 0; i 
< zsl
->level
; i
++) { 
5402         if (update
[i
]->forward
[i
] == x
) { 
5404                 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1; 
5406             update
[i
]->forward
[i
] = x
->forward
[i
]; 
5408             /* invariant: i > 0, because update[0]->forward[0] 
5409              * is always equal to x */ 
5410             update
[i
]->span
[i
-1] -= 1; 
5413     if (x
->forward
[0]) { 
5414         x
->forward
[0]->backward 
= x
->backward
; 
5416         zsl
->tail 
= x
->backward
; 
5418     while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
5423 /* Delete an element with matching score/object from the skiplist. */ 
5424 static int zslDelete(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5425     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5429     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5430         while (x
->forward
[i
] && 
5431             (x
->forward
[i
]->score 
< score 
|| 
5432                 (x
->forward
[i
]->score 
== score 
&& 
5433                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
5437     /* We may have multiple elements with the same score, what we need 
5438      * is to find the element with both the right score and object. */ 
5440     if (x 
&& score 
== x
->score 
&& compareStringObjects(x
->obj
,obj
) == 0) { 
5441         zslDeleteNode(zsl
, x
, update
); 
5445         return 0; /* not found */ 
5447     return 0; /* not found */ 
5450 /* Delete all the elements with score between min and max from the skiplist. 
5451  * Min and mx are inclusive, so a score >= min || score <= max is deleted. 
5452  * Note that this function takes the reference to the hash table view of the 
5453  * sorted set, in order to remove the elements from the hash table too. */ 
5454 static unsigned long zslDeleteRangeByScore(zskiplist 
*zsl
, double min
, double max
, dict 
*dict
) { 
5455     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5456     unsigned long removed 
= 0; 
5460     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5461         while (x
->forward
[i
] && x
->forward
[i
]->score 
< min
) 
5465     /* We may have multiple elements with the same score, what we need 
5466      * is to find the element with both the right score and object. */ 
5468     while (x 
&& x
->score 
<= max
) { 
5469         zskiplistNode 
*next 
= x
->forward
[0]; 
5470         zslDeleteNode(zsl
, x
, update
); 
5471         dictDelete(dict
,x
->obj
); 
5476     return removed
; /* not found */ 
5479 /* Delete all the elements with rank between start and end from the skiplist. 
5480  * Start and end are inclusive. Note that start and end need to be 1-based */ 
5481 static unsigned long zslDeleteRangeByRank(zskiplist 
*zsl
, unsigned int start
, unsigned int end
, dict 
*dict
) { 
5482     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5483     unsigned long traversed 
= 0, removed 
= 0; 
5487     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5488         while (x
->forward
[i
] && (traversed 
+ (i 
> 0 ? x
->span
[i
-1] : 1)) < start
) { 
5489             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5497     while (x 
&& traversed 
<= end
) { 
5498         zskiplistNode 
*next 
= x
->forward
[0]; 
5499         zslDeleteNode(zsl
, x
, update
); 
5500         dictDelete(dict
,x
->obj
); 
5509 /* Find the first node having a score equal or greater than the specified one. 
5510  * Returns NULL if there is no match. */ 
5511 static zskiplistNode 
*zslFirstWithScore(zskiplist 
*zsl
, double score
) { 
5516     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5517         while (x
->forward
[i
] && x
->forward
[i
]->score 
< score
) 
5520     /* We may have multiple elements with the same score, what we need 
5521      * is to find the element with both the right score and object. */ 
5522     return x
->forward
[0]; 
5525 /* Find the rank for an element by both score and key. 
5526  * Returns 0 when the element cannot be found, rank otherwise. 
5527  * Note that the rank is 1-based due to the span of zsl->header to the 
5529 static unsigned long zslGetRank(zskiplist 
*zsl
, double score
, robj 
*o
) { 
5531     unsigned long rank 
= 0; 
5535     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5536         while (x
->forward
[i
] && 
5537             (x
->forward
[i
]->score 
< score 
|| 
5538                 (x
->forward
[i
]->score 
== score 
&& 
5539                 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) { 
5540             rank 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5544         /* x might be equal to zsl->header, so test if obj is non-NULL */ 
5545         if (x
->obj 
&& compareStringObjects(x
->obj
,o
) == 0) { 
5552 /* Finds an element by its rank. The rank argument needs to be 1-based. */ 
5553 zskiplistNode
* zslGetElementByRank(zskiplist 
*zsl
, unsigned long rank
) { 
5555     unsigned long traversed 
= 0; 
5559     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5560         while (x
->forward
[i
] && (traversed 
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
) 
5562             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5565         if (traversed 
== rank
) { 
5572 /* The actual Z-commands implementations */ 
5574 /* This generic command implements both ZADD and ZINCRBY. 
5575  * scoreval is the score if the operation is a ZADD (doincrement == 0) or 
5576  * the increment if the operation is a ZINCRBY (doincrement == 1). */ 
5577 static void zaddGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*ele
, double scoreval
, int doincrement
) { 
5582     zsetobj 
= lookupKeyWrite(c
->db
,key
); 
5583     if (zsetobj 
== NULL
) { 
5584         zsetobj 
= createZsetObject(); 
5585         dictAdd(c
->db
->dict
,key
,zsetobj
); 
5588         if (zsetobj
->type 
!= REDIS_ZSET
) { 
5589             addReply(c
,shared
.wrongtypeerr
); 
5595     /* Ok now since we implement both ZADD and ZINCRBY here the code 
5596      * needs to handle the two different conditions. It's all about setting 
5597      * '*score', that is, the new score to set, to the right value. */ 
5598     score 
= zmalloc(sizeof(double)); 
5602         /* Read the old score. If the element was not present starts from 0 */ 
5603         de 
= dictFind(zs
->dict
,ele
); 
5605             double *oldscore 
= dictGetEntryVal(de
); 
5606             *score 
= *oldscore 
+ scoreval
; 
5614     /* What follows is a simple remove and re-insert operation that is common 
5615      * to both ZADD and ZINCRBY... */ 
5616     if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) { 
5617         /* case 1: New element */ 
5618         incrRefCount(ele
); /* added to hash */ 
5619         zslInsert(zs
->zsl
,*score
,ele
); 
5620         incrRefCount(ele
); /* added to skiplist */ 
5623             addReplyDouble(c
,*score
); 
5625             addReply(c
,shared
.cone
); 
5630         /* case 2: Score update operation */ 
5631         de 
= dictFind(zs
->dict
,ele
); 
5632         redisAssert(de 
!= NULL
); 
5633         oldscore 
= dictGetEntryVal(de
); 
5634         if (*score 
!= *oldscore
) { 
5637             /* Remove and insert the element in the skip list with new score */ 
5638             deleted 
= zslDelete(zs
->zsl
,*oldscore
,ele
); 
5639             redisAssert(deleted 
!= 0); 
5640             zslInsert(zs
->zsl
,*score
,ele
); 
5642             /* Update the score in the hash table */ 
5643             dictReplace(zs
->dict
,ele
,score
); 
5649             addReplyDouble(c
,*score
); 
5651             addReply(c
,shared
.czero
); 
5655 static void zaddCommand(redisClient 
*c
) { 
5658     if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return; 
5659     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0); 
5662 static void zincrbyCommand(redisClient 
*c
) { 
5665     if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return; 
5666     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1); 
5669 static void zremCommand(redisClient 
*c
) { 
5676     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5677         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5680     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5682         addReply(c
,shared
.czero
); 
5685     /* Delete from the skiplist */ 
5686     oldscore 
= dictGetEntryVal(de
); 
5687     deleted 
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]); 
5688     redisAssert(deleted 
!= 0); 
5690     /* Delete from the hash table */ 
5691     dictDelete(zs
->dict
,c
->argv
[2]); 
5692     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5693     if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5695     addReply(c
,shared
.cone
); 
5698 static void zremrangebyscoreCommand(redisClient 
*c
) { 
5705     if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) || 
5706         (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return; 
5708     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5709         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5712     deleted 
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
); 
5713     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5714     if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5715     server
.dirty 
+= deleted
; 
5716     addReplyLong(c
,deleted
); 
5719 static void zremrangebyrankCommand(redisClient 
*c
) { 
5727     if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) || 
5728         (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return; 
5730     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5731         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5733     llen 
= zs
->zsl
->length
; 
5735     /* convert negative indexes */ 
5736     if (start 
< 0) start 
= llen
+start
; 
5737     if (end 
< 0) end 
= llen
+end
; 
5738     if (start 
< 0) start 
= 0; 
5739     if (end 
< 0) end 
= 0; 
5741     /* indexes sanity checks */ 
5742     if (start 
> end 
|| start 
>= llen
) { 
5743         addReply(c
,shared
.czero
); 
5746     if (end 
>= llen
) end 
= llen
-1; 
5748     /* increment start and end because zsl*Rank functions 
5749      * use 1-based rank */ 
5750     deleted 
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
); 
5751     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5752     if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5753     server
.dirty 
+= deleted
; 
5754     addReplyLong(c
, deleted
); 
5762 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) { 
5763     zsetopsrc 
*d1 
= (void*) s1
, *d2 
= (void*) s2
; 
5764     unsigned long size1
, size2
; 
5765     size1 
= d1
->dict 
? dictSize(d1
->dict
) : 0; 
5766     size2 
= d2
->dict 
? dictSize(d2
->dict
) : 0; 
5767     return size1 
- size2
; 
5770 #define REDIS_AGGR_SUM 1 
5771 #define REDIS_AGGR_MIN 2 
5772 #define REDIS_AGGR_MAX 3 
5774 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) { 
5775     if (aggregate 
== REDIS_AGGR_SUM
) { 
5776         *target 
= *target 
+ val
; 
5777     } else if (aggregate 
== REDIS_AGGR_MIN
) { 
5778         *target 
= val 
< *target 
? val 
: *target
; 
5779     } else if (aggregate 
== REDIS_AGGR_MAX
) { 
5780         *target 
= val 
> *target 
? val 
: *target
; 
5783         redisPanic("Unknown ZUNION/INTER aggregate type"); 
5787 static void zunionInterGenericCommand(redisClient 
*c
, robj 
*dstkey
, int op
) { 
5789     int aggregate 
= REDIS_AGGR_SUM
; 
5796     /* expect zsetnum input keys to be given */ 
5797     zsetnum 
= atoi(c
->argv
[2]->ptr
); 
5799         addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n")); 
5803     /* test if the expected number of keys would overflow */ 
5804     if (3+zsetnum 
> c
->argc
) { 
5805         addReply(c
,shared
.syntaxerr
); 
5809     /* read keys to be used for input */ 
5810     src 
= zmalloc(sizeof(zsetopsrc
) * zsetnum
); 
5811     for (i 
= 0, j 
= 3; i 
< zsetnum
; i
++, j
++) { 
5812         robj 
*zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
5816             if (zsetobj
->type 
!= REDIS_ZSET
) { 
5818                 addReply(c
,shared
.wrongtypeerr
); 
5821             src
[i
].dict 
= ((zset
*)zsetobj
->ptr
)->dict
; 
5824         /* default all weights to 1 */ 
5825         src
[i
].weight 
= 1.0; 
5828     /* parse optional extra arguments */ 
5830         int remaining 
= c
->argc 
- j
; 
5833             if (remaining 
>= (zsetnum 
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) { 
5835                 for (i 
= 0; i 
< zsetnum
; i
++, j
++, remaining
--) { 
5836                     if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
) 
5839             } else if (remaining 
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) { 
5841                 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) { 
5842                     aggregate 
= REDIS_AGGR_SUM
; 
5843                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) { 
5844                     aggregate 
= REDIS_AGGR_MIN
; 
5845                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) { 
5846                     aggregate 
= REDIS_AGGR_MAX
; 
5849                     addReply(c
,shared
.syntaxerr
); 
5855                 addReply(c
,shared
.syntaxerr
); 
5861     /* sort sets from the smallest to largest, this will improve our 
5862      * algorithm's performance */ 
5863     qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
); 
5865     dstobj 
= createZsetObject(); 
5866     dstzset 
= dstobj
->ptr
; 
5868     if (op 
== REDIS_OP_INTER
) { 
5869         /* skip going over all entries if the smallest zset is NULL or empty */ 
5870         if (src
[0].dict 
&& dictSize(src
[0].dict
) > 0) { 
5871             /* precondition: as src[0].dict is non-empty and the zsets are ordered 
5872              * from small to large, all src[i > 0].dict are non-empty too */ 
5873             di 
= dictGetIterator(src
[0].dict
); 
5874             while((de 
= dictNext(di
)) != NULL
) { 
5875                 double *score 
= zmalloc(sizeof(double)), value
; 
5876                 *score 
= src
[0].weight 
* (*(double*)dictGetEntryVal(de
)); 
5878                 for (j 
= 1; j 
< zsetnum
; j
++) { 
5879                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
5881                         value 
= src
[j
].weight 
* (*(double*)dictGetEntryVal(other
)); 
5882                         zunionInterAggregate(score
, value
, aggregate
); 
5888                 /* skip entry when not present in every source dict */ 
5892                     robj 
*o 
= dictGetEntryKey(de
); 
5893                     dictAdd(dstzset
->dict
,o
,score
); 
5894                     incrRefCount(o
); /* added to dictionary */ 
5895                     zslInsert(dstzset
->zsl
,*score
,o
); 
5896                     incrRefCount(o
); /* added to skiplist */ 
5899             dictReleaseIterator(di
); 
5901     } else if (op 
== REDIS_OP_UNION
) { 
5902         for (i 
= 0; i 
< zsetnum
; i
++) { 
5903             if (!src
[i
].dict
) continue; 
5905             di 
= dictGetIterator(src
[i
].dict
); 
5906             while((de 
= dictNext(di
)) != NULL
) { 
5907                 /* skip key when already processed */ 
5908                 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue; 
5910                 double *score 
= zmalloc(sizeof(double)), value
; 
5911                 *score 
= src
[i
].weight 
* (*(double*)dictGetEntryVal(de
)); 
5913                 /* because the zsets are sorted by size, its only possible 
5914                  * for sets at larger indices to hold this entry */ 
5915                 for (j 
= (i
+1); j 
< zsetnum
; j
++) { 
5916                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
5918                         value 
= src
[j
].weight 
* (*(double*)dictGetEntryVal(other
)); 
5919                         zunionInterAggregate(score
, value
, aggregate
); 
5923                 robj 
*o 
= dictGetEntryKey(de
); 
5924                 dictAdd(dstzset
->dict
,o
,score
); 
5925                 incrRefCount(o
); /* added to dictionary */ 
5926                 zslInsert(dstzset
->zsl
,*score
,o
); 
5927                 incrRefCount(o
); /* added to skiplist */ 
5929             dictReleaseIterator(di
); 
5932         /* unknown operator */ 
5933         redisAssert(op 
== REDIS_OP_INTER 
|| op 
== REDIS_OP_UNION
); 
5936     deleteKey(c
->db
,dstkey
); 
5937     if (dstzset
->zsl
->length
) { 
5938         dictAdd(c
->db
->dict
,dstkey
,dstobj
); 
5939         incrRefCount(dstkey
); 
5940         addReplyLong(c
, dstzset
->zsl
->length
); 
5943         decrRefCount(dstobj
); 
5944         addReply(c
, shared
.czero
); 
5949 static void zunionCommand(redisClient 
*c
) { 
5950     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
); 
5953 static void zinterCommand(redisClient 
*c
) { 
5954     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
); 
5957 static void zrangeGenericCommand(redisClient 
*c
, int reverse
) { 
5969     if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) || 
5970         (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return; 
5972     if (c
->argc 
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) { 
5974     } else if (c
->argc 
>= 5) { 
5975         addReply(c
,shared
.syntaxerr
); 
5979     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
5980          || checkType(c
,o
,REDIS_ZSET
)) return; 
5985     /* convert negative indexes */ 
5986     if (start 
< 0) start 
= llen
+start
; 
5987     if (end 
< 0) end 
= llen
+end
; 
5988     if (start 
< 0) start 
= 0; 
5989     if (end 
< 0) end 
= 0; 
5991     /* indexes sanity checks */ 
5992     if (start 
> end 
|| start 
>= llen
) { 
5993         /* Out of range start or start > end result in empty list */ 
5994         addReply(c
,shared
.emptymultibulk
); 
5997     if (end 
>= llen
) end 
= llen
-1; 
5998     rangelen 
= (end
-start
)+1; 
6000     /* check if starting point is trivial, before searching 
6001      * the element in log(N) time */ 
6003         ln 
= start 
== 0 ? zsl
->tail 
: zslGetElementByRank(zsl
, llen
-start
); 
6006             zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1); 
6009     /* Return the result in form of a multi-bulk reply */ 
6010     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n", 
6011         withscores 
? (rangelen
*2) : rangelen
)); 
6012     for (j 
= 0; j 
< rangelen
; j
++) { 
6014         addReplyBulk(c
,ele
); 
6016             addReplyDouble(c
,ln
->score
); 
6017         ln 
= reverse 
? ln
->backward 
: ln
->forward
[0]; 
6021 static void zrangeCommand(redisClient 
*c
) { 
6022     zrangeGenericCommand(c
,0); 
6025 static void zrevrangeCommand(redisClient 
*c
) { 
6026     zrangeGenericCommand(c
,1); 
6029 /* This command implements both ZRANGEBYSCORE and ZCOUNT. 
6030  * If justcount is non-zero, just the count is returned. */ 
6031 static void genericZrangebyscoreCommand(redisClient 
*c
, int justcount
) { 
6034     int minex 
= 0, maxex 
= 0; /* are min or max exclusive? */ 
6035     int offset 
= 0, limit 
= -1; 
6039     /* Parse the min-max interval. If one of the values is prefixed 
6040      * by the "(" character, it's considered "open". For instance 
6041      * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max 
6042      * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ 
6043     if (((char*)c
->argv
[2]->ptr
)[0] == '(') { 
6044         min 
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
); 
6047         min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
6049     if (((char*)c
->argv
[3]->ptr
)[0] == '(') { 
6050         max 
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
); 
6053         max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
6056     /* Parse "WITHSCORES": note that if the command was called with 
6057      * the name ZCOUNT then we are sure that c->argc == 4, so we'll never 
6058      * enter the following paths to parse WITHSCORES and LIMIT. */ 
6059     if (c
->argc 
== 5 || c
->argc 
== 8) { 
6060         if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0) 
6065     if (c
->argc 
!= (4 + withscores
) && c
->argc 
!= (7 + withscores
)) 
6069             sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); 
6074     if (c
->argc 
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) { 
6075         addReply(c
,shared
.syntaxerr
); 
6077     } else if (c
->argc 
== (7 + withscores
)) { 
6078         offset 
= atoi(c
->argv
[5]->ptr
); 
6079         limit 
= atoi(c
->argv
[6]->ptr
); 
6080         if (offset 
< 0) offset 
= 0; 
6083     /* Ok, lookup the key and get the range */ 
6084     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6086         addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
6088         if (o
->type 
!= REDIS_ZSET
) { 
6089             addReply(c
,shared
.wrongtypeerr
); 
6091             zset 
*zsetobj 
= o
->ptr
; 
6092             zskiplist 
*zsl 
= zsetobj
->zsl
; 
6094             robj 
*ele
, *lenobj 
= NULL
; 
6095             unsigned long rangelen 
= 0; 
6097             /* Get the first node with the score >= min, or with 
6098              * score > min if 'minex' is true. */ 
6099             ln 
= zslFirstWithScore(zsl
,min
); 
6100             while (minex 
&& ln 
&& ln
->score 
== min
) ln 
= ln
->forward
[0]; 
6103                 /* No element matching the speciifed interval */ 
6104                 addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
6108             /* We don't know in advance how many matching elements there 
6109              * are in the list, so we push this object that will represent 
6110              * the multi-bulk length in the output buffer, and will "fix" 
6113                 lenobj 
= createObject(REDIS_STRING
,NULL
); 
6115                 decrRefCount(lenobj
); 
6118             while(ln 
&& (maxex 
? (ln
->score 
< max
) : (ln
->score 
<= max
))) { 
6121                     ln 
= ln
->forward
[0]; 
6124                 if (limit 
== 0) break; 
6127                     addReplyBulk(c
,ele
); 
6129                         addReplyDouble(c
,ln
->score
); 
6131                 ln 
= ln
->forward
[0]; 
6133                 if (limit 
> 0) limit
--; 
6136                 addReplyLong(c
,(long)rangelen
); 
6138                 lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n", 
6139                      withscores 
? (rangelen
*2) : rangelen
); 
6145 static void zrangebyscoreCommand(redisClient 
*c
) { 
6146     genericZrangebyscoreCommand(c
,0); 
6149 static void zcountCommand(redisClient 
*c
) { 
6150     genericZrangebyscoreCommand(c
,1); 
6153 static void zcardCommand(redisClient 
*c
) { 
6157     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6158         checkType(c
,o
,REDIS_ZSET
)) return; 
6161     addReplyUlong(c
,zs
->zsl
->length
); 
6164 static void zscoreCommand(redisClient 
*c
) { 
6169     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6170         checkType(c
,o
,REDIS_ZSET
)) return; 
6173     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6175         addReply(c
,shared
.nullbulk
); 
6177         double *score 
= dictGetEntryVal(de
); 
6179         addReplyDouble(c
,*score
); 
6183 static void zrankGenericCommand(redisClient 
*c
, int reverse
) { 
6191     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6192         checkType(c
,o
,REDIS_ZSET
)) return; 
6196     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6198         addReply(c
,shared
.nullbulk
); 
6202     score 
= dictGetEntryVal(de
); 
6203     rank 
= zslGetRank(zsl
, *score
, c
->argv
[2]); 
6206             addReplyLong(c
, zsl
->length 
- rank
); 
6208             addReplyLong(c
, rank
-1); 
6211         addReply(c
,shared
.nullbulk
); 
6215 static void zrankCommand(redisClient 
*c
) { 
6216     zrankGenericCommand(c
, 0); 
6219 static void zrevrankCommand(redisClient 
*c
) { 
6220     zrankGenericCommand(c
, 1); 
6223 /* ========================= Hashes utility functions ======================= */ 
6224 #define REDIS_HASH_KEY 1 
6225 #define REDIS_HASH_VALUE 2 
6227 /* Check the length of a number of objects to see if we need to convert a 
6228  * zipmap to a real hash. Note that we only check string encoded objects 
6229  * as their string length can be queried in constant time. */ 
6230 static void hashTryConversion(robj 
*subject
, robj 
**argv
, int start
, int end
) { 
6232     if (subject
->encoding 
!= REDIS_ENCODING_ZIPMAP
) return; 
6234     for (i 
= start
; i 
<= end
; i
++) { 
6235         if (argv
[i
]->encoding 
== REDIS_ENCODING_RAW 
&& 
6236             sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
) 
6238             convertToRealHash(subject
); 
6244 /* Encode given objects in-place when the hash uses a dict. */ 
6245 static void hashTryObjectEncoding(robj 
*subject
, robj 
**o1
, robj 
**o2
) { 
6246     if (subject
->encoding 
== REDIS_ENCODING_HT
) { 
6247         if (o1
) *o1 
= tryObjectEncoding(*o1
); 
6248         if (o2
) *o2 
= tryObjectEncoding(*o2
); 
6252 /* Get the value from a hash identified by key. Returns either a string 
6253  * object or NULL if the value cannot be found. The refcount of the object 
6254  * is always increased by 1 when the value was found. */ 
6255 static robj 
*hashGet(robj 
*o
, robj 
*key
) { 
6257     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6260         key 
= getDecodedObject(key
); 
6261         if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) { 
6262             value 
= createStringObject((char*)v
,vlen
); 
6266         dictEntry 
*de 
= dictFind(o
->ptr
,key
); 
6268             value 
= dictGetEntryVal(de
); 
6269             incrRefCount(value
); 
6275 /* Test if the key exists in the given hash. Returns 1 if the key 
6276  * exists and 0 when it doesn't. */ 
6277 static int hashExists(robj 
*o
, robj 
*key
) { 
6278     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6279         key 
= getDecodedObject(key
); 
6280         if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) { 
6286         if (dictFind(o
->ptr
,key
) != NULL
) { 
6293 /* Add an element, discard the old if the key already exists. 
6294  * Return 0 on insert and 1 on update. */ 
6295 static int hashSet(robj 
*o
, robj 
*key
, robj 
*value
) { 
6297     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6298         key 
= getDecodedObject(key
); 
6299         value 
= getDecodedObject(value
); 
6300         o
->ptr 
= zipmapSet(o
->ptr
, 
6301             key
->ptr
,sdslen(key
->ptr
), 
6302             value
->ptr
,sdslen(value
->ptr
), &update
); 
6304         decrRefCount(value
); 
6306         /* Check if the zipmap needs to be upgraded to a real hash table */ 
6307         if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
) 
6308             convertToRealHash(o
); 
6310         if (dictReplace(o
->ptr
,key
,value
)) { 
6317         incrRefCount(value
); 
6322 /* Delete an element from a hash. 
6323  * Return 1 on deleted and 0 on not found. */ 
6324 static int hashDelete(robj 
*o
, robj 
*key
) { 
6326     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6327         key 
= getDecodedObject(key
); 
6328         o
->ptr 
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
); 
6331         deleted 
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
; 
6332         /* Always check if the dictionary needs a resize after a delete. */ 
6333         if (deleted 
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
); 
6338 /* Return the number of elements in a hash. */ 
6339 static unsigned long hashLength(robj 
*o
) { 
6340     return (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) ? 
6341         zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
); 
6344 /* Structure to hold hash iteration abstration. Note that iteration over 
6345  * hashes involves both fields and values. Because it is possible that 
6346  * not both are required, store pointers in the iterator to avoid 
6347  * unnecessary memory allocation for fields/values. */ 
6351     unsigned char *zk
, *zv
; 
6352     unsigned int zklen
, zvlen
; 
6358 static hashIterator 
*hashInitIterator(robj 
*subject
) { 
6359     hashIterator 
*hi 
= zmalloc(sizeof(hashIterator
)); 
6360     hi
->encoding 
= subject
->encoding
; 
6361     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6362         hi
->zi 
= zipmapRewind(subject
->ptr
); 
6363     } else if (hi
->encoding 
== REDIS_ENCODING_HT
) { 
6364         hi
->di 
= dictGetIterator(subject
->ptr
); 
6371 static void hashReleaseIterator(hashIterator 
*hi
) { 
6372     if (hi
->encoding 
== REDIS_ENCODING_HT
) { 
6373         dictReleaseIterator(hi
->di
); 
6378 /* Move to the next entry in the hash. Return REDIS_OK when the next entry 
6379  * could be found and REDIS_ERR when the iterator reaches the end. */ 
6380 static int hashNext(hashIterator 
*hi
) { 
6381     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6382         if ((hi
->zi 
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
, 
6383             &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
; 
6385         if ((hi
->de 
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
; 
6390 /* Get key or value object at current iteration position. 
6391  * This increases the refcount of the field object by 1. */ 
6392 static robj 
*hashCurrent(hashIterator 
*hi
, int what
) { 
6394     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6395         if (what 
& REDIS_HASH_KEY
) { 
6396             o 
= createStringObject((char*)hi
->zk
,hi
->zklen
); 
6398             o 
= createStringObject((char*)hi
->zv
,hi
->zvlen
); 
6401         if (what 
& REDIS_HASH_KEY
) { 
6402             o 
= dictGetEntryKey(hi
->de
); 
6404             o 
= dictGetEntryVal(hi
->de
); 
6411 static robj 
*hashLookupWriteOrCreate(redisClient 
*c
, robj 
*key
) { 
6412     robj 
*o 
= lookupKeyWrite(c
->db
,key
); 
6414         o 
= createHashObject(); 
6415         dictAdd(c
->db
->dict
,key
,o
); 
6418         if (o
->type 
!= REDIS_HASH
) { 
6419             addReply(c
,shared
.wrongtypeerr
); 
6426 /* ============================= Hash commands ============================== */ 
6427 static void hsetCommand(redisClient 
*c
) { 
6431     if ((o 
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
6432     hashTryConversion(o
,c
->argv
,2,3); 
6433     hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]); 
6434     update 
= hashSet(o
,c
->argv
[2],c
->argv
[3]); 
6435     addReply(c
, update 
? shared
.czero 
: shared
.cone
); 
6439 static void hsetnxCommand(redisClient 
*c
) { 
6441     if ((o 
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
6442     hashTryConversion(o
,c
->argv
,2,3); 
6444     if (hashExists(o
, c
->argv
[2])) { 
6445         addReply(c
, shared
.czero
); 
6447         hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]); 
6448         hashSet(o
,c
->argv
[2],c
->argv
[3]); 
6449         addReply(c
, shared
.cone
); 
6454 static void hmsetCommand(redisClient 
*c
) { 
6458     if ((c
->argc 
% 2) == 1) { 
6459         addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n")); 
6463     if ((o 
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
6464     hashTryConversion(o
,c
->argv
,2,c
->argc
-1); 
6465     for (i 
= 2; i 
< c
->argc
; i 
+= 2) { 
6466         hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]); 
6467         hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]); 
6469     addReply(c
, shared
.ok
); 
6473 static void hincrbyCommand(redisClient 
*c
) { 
6474     long long value
, incr
; 
6475     robj 
*o
, *current
, *new; 
6477     if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return; 
6478     if ((o 
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
6479     if ((current 
= hashGet(o
,c
->argv
[2])) != NULL
) { 
6480         if (getLongLongFromObjectOrReply(c
,current
,&value
, 
6481             "hash value is not an integer") != REDIS_OK
) { 
6482             decrRefCount(current
); 
6485         decrRefCount(current
); 
6491     new = createStringObjectFromLongLong(value
); 
6492     hashTryObjectEncoding(o
,&c
->argv
[2],NULL
); 
6493     hashSet(o
,c
->argv
[2],new); 
6495     addReplyLongLong(c
,value
); 
6499 static void hgetCommand(redisClient 
*c
) { 
6501     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6502         checkType(c
,o
,REDIS_HASH
)) return; 
6504     if ((value 
= hashGet(o
,c
->argv
[2])) != NULL
) { 
6505         addReplyBulk(c
,value
); 
6506         decrRefCount(value
); 
6508         addReply(c
,shared
.nullbulk
); 
6512 static void hmgetCommand(redisClient 
*c
) { 
6515     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6516     if (o 
!= NULL 
&& o
->type 
!= REDIS_HASH
) { 
6517         addReply(c
,shared
.wrongtypeerr
); 
6520     /* Note the check for o != NULL happens inside the loop. This is 
6521      * done because objects that cannot be found are considered to be 
6522      * an empty hash. The reply should then be a series of NULLs. */ 
6523     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2)); 
6524     for (i 
= 2; i 
< c
->argc
; i
++) { 
6525         if (o 
!= NULL 
&& (value 
= hashGet(o
,c
->argv
[i
])) != NULL
) { 
6526             addReplyBulk(c
,value
); 
6527             decrRefCount(value
); 
6529             addReply(c
,shared
.nullbulk
); 
6534 static void hdelCommand(redisClient 
*c
) { 
6536     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6537         checkType(c
,o
,REDIS_HASH
)) return; 
6539     if (hashDelete(o
,c
->argv
[2])) { 
6540         if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
6541         addReply(c
,shared
.cone
); 
6544         addReply(c
,shared
.czero
); 
6548 static void hlenCommand(redisClient 
*c
) { 
6550     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6551         checkType(c
,o
,REDIS_HASH
)) return; 
6553     addReplyUlong(c
,hashLength(o
)); 
6556 static void genericHgetallCommand(redisClient 
*c
, int flags
) { 
6557     robj 
*o
, *lenobj
, *obj
; 
6558     unsigned long count 
= 0; 
6561     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
6562         || checkType(c
,o
,REDIS_HASH
)) return; 
6564     lenobj 
= createObject(REDIS_STRING
,NULL
); 
6566     decrRefCount(lenobj
); 
6568     hi 
= hashInitIterator(o
); 
6569     while (hashNext(hi
) != REDIS_ERR
) { 
6570         if (flags 
& REDIS_HASH_KEY
) { 
6571             obj 
= hashCurrent(hi
,REDIS_HASH_KEY
); 
6572             addReplyBulk(c
,obj
); 
6576         if (flags 
& REDIS_HASH_VALUE
) { 
6577             obj 
= hashCurrent(hi
,REDIS_HASH_VALUE
); 
6578             addReplyBulk(c
,obj
); 
6583     hashReleaseIterator(hi
); 
6585     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",count
); 
6588 static void hkeysCommand(redisClient 
*c
) { 
6589     genericHgetallCommand(c
,REDIS_HASH_KEY
); 
6592 static void hvalsCommand(redisClient 
*c
) { 
6593     genericHgetallCommand(c
,REDIS_HASH_VALUE
); 
6596 static void hgetallCommand(redisClient 
*c
) { 
6597     genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
); 
6600 static void hexistsCommand(redisClient 
*c
) { 
6602     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6603         checkType(c
,o
,REDIS_HASH
)) return; 
6605     addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone 
: shared
.czero
); 
6608 static void convertToRealHash(robj 
*o
) { 
6609     unsigned char *key
, *val
, *p
, *zm 
= o
->ptr
; 
6610     unsigned int klen
, vlen
; 
6611     dict 
*dict 
= dictCreate(&hashDictType
,NULL
); 
6613     assert(o
->type 
== REDIS_HASH 
&& o
->encoding 
!= REDIS_ENCODING_HT
); 
6614     p 
= zipmapRewind(zm
); 
6615     while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
6616         robj 
*keyobj
, *valobj
; 
6618         keyobj 
= createStringObject((char*)key
,klen
); 
6619         valobj 
= createStringObject((char*)val
,vlen
); 
6620         keyobj 
= tryObjectEncoding(keyobj
); 
6621         valobj 
= tryObjectEncoding(valobj
); 
6622         dictAdd(dict
,keyobj
,valobj
); 
6624     o
->encoding 
= REDIS_ENCODING_HT
; 
6629 /* ========================= Non type-specific commands  ==================== */ 
6631 static void flushdbCommand(redisClient 
*c
) { 
6632     server
.dirty 
+= dictSize(c
->db
->dict
); 
6633     dictEmpty(c
->db
->dict
); 
6634     dictEmpty(c
->db
->expires
); 
6635     addReply(c
,shared
.ok
); 
6638 static void flushallCommand(redisClient 
*c
) { 
6639     server
.dirty 
+= emptyDb(); 
6640     addReply(c
,shared
.ok
); 
6641     if (server
.bgsavechildpid 
!= -1) { 
6642         kill(server
.bgsavechildpid
,SIGKILL
); 
6643         rdbRemoveTempFile(server
.bgsavechildpid
); 
6645     rdbSave(server
.dbfilename
); 
6649 static redisSortOperation 
*createSortOperation(int type
, robj 
*pattern
) { 
6650     redisSortOperation 
*so 
= zmalloc(sizeof(*so
)); 
6652     so
->pattern 
= pattern
; 
6656 /* Return the value associated to the key with a name obtained 
6657  * substituting the first occurence of '*' in 'pattern' with 'subst'. 
6658  * The returned object will always have its refcount increased by 1 
6659  * when it is non-NULL. */ 
6660 static robj 
*lookupKeyByPattern(redisDb 
*db
, robj 
*pattern
, robj 
*subst
) { 
6663     robj keyobj
, fieldobj
, *o
; 
6664     int prefixlen
, sublen
, postfixlen
, fieldlen
; 
6665     /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */ 
6669         char buf
[REDIS_SORTKEY_MAX
+1]; 
6670     } keyname
, fieldname
; 
6672     /* If the pattern is "#" return the substitution object itself in order 
6673      * to implement the "SORT ... GET #" feature. */ 
6674     spat 
= pattern
->ptr
; 
6675     if (spat
[0] == '#' && spat
[1] == '\0') { 
6676         incrRefCount(subst
); 
6680     /* The substitution object may be specially encoded. If so we create 
6681      * a decoded object on the fly. Otherwise getDecodedObject will just 
6682      * increment the ref count, that we'll decrement later. */ 
6683     subst 
= getDecodedObject(subst
); 
6686     if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
; 
6687     p 
= strchr(spat
,'*'); 
6689         decrRefCount(subst
); 
6693     /* Find out if we're dealing with a hash dereference. */ 
6694     if ((f 
= strstr(p
+1, "->")) != NULL
) { 
6695         fieldlen 
= sdslen(spat
)-(f
-spat
); 
6696         /* this also copies \0 character */ 
6697         memcpy(fieldname
.buf
,f
+2,fieldlen
-1); 
6698         fieldname
.len 
= fieldlen
-2; 
6704     sublen 
= sdslen(ssub
); 
6705     postfixlen 
= sdslen(spat
)-(prefixlen
+1)-fieldlen
; 
6706     memcpy(keyname
.buf
,spat
,prefixlen
); 
6707     memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
); 
6708     memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
); 
6709     keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0'; 
6710     keyname
.len 
= prefixlen
+sublen
+postfixlen
; 
6711     decrRefCount(subst
); 
6713     /* Lookup substituted key */ 
6714     initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2)); 
6715     o 
= lookupKeyRead(db
,&keyobj
); 
6716     if (o 
== NULL
) return NULL
; 
6719         if (o
->type 
!= REDIS_HASH 
|| fieldname
.len 
< 1) return NULL
; 
6721         /* Retrieve value from hash by the field name. This operation 
6722          * already increases the refcount of the returned object. */ 
6723         initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2)); 
6724         o 
= hashGet(o
, &fieldobj
); 
6726         if (o
->type 
!= REDIS_STRING
) return NULL
; 
6728         /* Every object that this function returns needs to have its refcount 
6729          * increased. sortCommand decreases it again. */ 
6736 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with 
6737  * the additional parameter is not standard but a BSD-specific we have to 
6738  * pass sorting parameters via the global 'server' structure */ 
6739 static int sortCompare(const void *s1
, const void *s2
) { 
6740     const redisSortObject 
*so1 
= s1
, *so2 
= s2
; 
6743     if (!server
.sort_alpha
) { 
6744         /* Numeric sorting. Here it's trivial as we precomputed scores */ 
6745         if (so1
->u
.score 
> so2
->u
.score
) { 
6747         } else if (so1
->u
.score 
< so2
->u
.score
) { 
6753         /* Alphanumeric sorting */ 
6754         if (server
.sort_bypattern
) { 
6755             if (!so1
->u
.cmpobj 
|| !so2
->u
.cmpobj
) { 
6756                 /* At least one compare object is NULL */ 
6757                 if (so1
->u
.cmpobj 
== so2
->u
.cmpobj
) 
6759                 else if (so1
->u
.cmpobj 
== NULL
) 
6764                 /* We have both the objects, use strcoll */ 
6765                 cmp 
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
); 
6768             /* Compare elements directly. */ 
6769             cmp 
= compareStringObjects(so1
->obj
,so2
->obj
); 
6772     return server
.sort_desc 
? -cmp 
: cmp
; 
6775 /* The SORT command is the most complex command in Redis. Warning: this code 
6776  * is optimized for speed and a bit less for readability */ 
6777 static void sortCommand(redisClient 
*c
) { 
6780     int desc 
= 0, alpha 
= 0; 
6781     int limit_start 
= 0, limit_count 
= -1, start
, end
; 
6782     int j
, dontsort 
= 0, vectorlen
; 
6783     int getop 
= 0; /* GET operation counter */ 
6784     robj 
*sortval
, *sortby 
= NULL
, *storekey 
= NULL
; 
6785     redisSortObject 
*vector
; /* Resulting vector to sort */ 
6787     /* Lookup the key to sort. It must be of the right types */ 
6788     sortval 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6789     if (sortval 
== NULL
) { 
6790         addReply(c
,shared
.emptymultibulk
); 
6793     if (sortval
->type 
!= REDIS_SET 
&& sortval
->type 
!= REDIS_LIST 
&& 
6794         sortval
->type 
!= REDIS_ZSET
) 
6796         addReply(c
,shared
.wrongtypeerr
); 
6800     /* Create a list of operations to perform for every sorted element. 
6801      * Operations can be GET/DEL/INCR/DECR */ 
6802     operations 
= listCreate(); 
6803     listSetFreeMethod(operations
,zfree
); 
6806     /* Now we need to protect sortval incrementing its count, in the future 
6807      * SORT may have options able to overwrite/delete keys during the sorting 
6808      * and the sorted key itself may get destroied */ 
6809     incrRefCount(sortval
); 
6811     /* The SORT command has an SQL-alike syntax, parse it */ 
6812     while(j 
< c
->argc
) { 
6813         int leftargs 
= c
->argc
-j
-1; 
6814         if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) { 
6816         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) { 
6818         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) { 
6820         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs 
>= 2) { 
6821             limit_start 
= atoi(c
->argv
[j
+1]->ptr
); 
6822             limit_count 
= atoi(c
->argv
[j
+2]->ptr
); 
6824         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs 
>= 1) { 
6825             storekey 
= c
->argv
[j
+1]; 
6827         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs 
>= 1) { 
6828             sortby 
= c
->argv
[j
+1]; 
6829             /* If the BY pattern does not contain '*', i.e. it is constant, 
6830              * we don't need to sort nor to lookup the weight keys. */ 
6831             if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort 
= 1; 
6833         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs 
>= 1) { 
6834             listAddNodeTail(operations
,createSortOperation( 
6835                 REDIS_SORT_GET
,c
->argv
[j
+1])); 
6839             decrRefCount(sortval
); 
6840             listRelease(operations
); 
6841             addReply(c
,shared
.syntaxerr
); 
6847     /* Load the sorting vector with all the objects to sort */ 
6848     switch(sortval
->type
) { 
6849     case REDIS_LIST
: vectorlen 
= listLength((list
*)sortval
->ptr
); break; 
6850     case REDIS_SET
: vectorlen 
=  dictSize((dict
*)sortval
->ptr
); break; 
6851     case REDIS_ZSET
: vectorlen 
= dictSize(((zset
*)sortval
->ptr
)->dict
); break; 
6852     default: vectorlen 
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */ 
6854     vector 
= zmalloc(sizeof(redisSortObject
)*vectorlen
); 
6857     if (sortval
->type 
== REDIS_LIST
) { 
6858         list 
*list 
= sortval
->ptr
; 
6862         listRewind(list
,&li
); 
6863         while((ln 
= listNext(&li
))) { 
6864             robj 
*ele 
= ln
->value
; 
6865             vector
[j
].obj 
= ele
; 
6866             vector
[j
].u
.score 
= 0; 
6867             vector
[j
].u
.cmpobj 
= NULL
; 
6875         if (sortval
->type 
== REDIS_SET
) { 
6878             zset 
*zs 
= sortval
->ptr
; 
6882         di 
= dictGetIterator(set
); 
6883         while((setele 
= dictNext(di
)) != NULL
) { 
6884             vector
[j
].obj 
= dictGetEntryKey(setele
); 
6885             vector
[j
].u
.score 
= 0; 
6886             vector
[j
].u
.cmpobj 
= NULL
; 
6889         dictReleaseIterator(di
); 
6891     redisAssert(j 
== vectorlen
); 
6893     /* Now it's time to load the right scores in the sorting vector */ 
6894     if (dontsort 
== 0) { 
6895         for (j 
= 0; j 
< vectorlen
; j
++) { 
6898                 /* lookup value to sort by */ 
6899                 byval 
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
); 
6900                 if (!byval
) continue; 
6902                 /* use object itself to sort by */ 
6903                 byval 
= vector
[j
].obj
; 
6907                 if (sortby
) vector
[j
].u
.cmpobj 
= getDecodedObject(byval
); 
6909                 if (byval
->encoding 
== REDIS_ENCODING_RAW
) { 
6910                     vector
[j
].u
.score 
= strtod(byval
->ptr
,NULL
); 
6911                 } else if (byval
->encoding 
== REDIS_ENCODING_INT
) { 
6912                     /* Don't need to decode the object if it's 
6913                      * integer-encoded (the only encoding supported) so 
6914                      * far. We can just cast it */ 
6915                     vector
[j
].u
.score 
= (long)byval
->ptr
; 
6917                     redisAssert(1 != 1); 
6921             /* when the object was retrieved using lookupKeyByPattern, 
6922              * its refcount needs to be decreased. */ 
6924                 decrRefCount(byval
); 
6929     /* We are ready to sort the vector... perform a bit of sanity check 
6930      * on the LIMIT option too. We'll use a partial version of quicksort. */ 
6931     start 
= (limit_start 
< 0) ? 0 : limit_start
; 
6932     end 
= (limit_count 
< 0) ? vectorlen
-1 : start
+limit_count
-1; 
6933     if (start 
>= vectorlen
) { 
6934         start 
= vectorlen
-1; 
6937     if (end 
>= vectorlen
) end 
= vectorlen
-1; 
6939     if (dontsort 
== 0) { 
6940         server
.sort_desc 
= desc
; 
6941         server
.sort_alpha 
= alpha
; 
6942         server
.sort_bypattern 
= sortby 
? 1 : 0; 
6943         if (sortby 
&& (start 
!= 0 || end 
!= vectorlen
-1)) 
6944             pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
); 
6946             qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
); 
6949     /* Send command output to the output buffer, performing the specified 
6950      * GET/DEL/INCR/DECR operations if any. */ 
6951     outputlen 
= getop 
? getop
*(end
-start
+1) : end
-start
+1; 
6952     if (storekey 
== NULL
) { 
6953         /* STORE option not specified, sent the sorting result to client */ 
6954         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
)); 
6955         for (j 
= start
; j 
<= end
; j
++) { 
6959             if (!getop
) addReplyBulk(c
,vector
[j
].obj
); 
6960             listRewind(operations
,&li
); 
6961             while((ln 
= listNext(&li
))) { 
6962                 redisSortOperation 
*sop 
= ln
->value
; 
6963                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
6966                 if (sop
->type 
== REDIS_SORT_GET
) { 
6968                         addReply(c
,shared
.nullbulk
); 
6970                         addReplyBulk(c
,val
); 
6974                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
6979         robj 
*listObject 
= createListObject(); 
6980         list 
*listPtr 
= (list
*) listObject
->ptr
; 
6982         /* STORE option specified, set the sorting result as a List object */ 
6983         for (j 
= start
; j 
<= end
; j
++) { 
6988                 listAddNodeTail(listPtr
,vector
[j
].obj
); 
6989                 incrRefCount(vector
[j
].obj
); 
6991             listRewind(operations
,&li
); 
6992             while((ln 
= listNext(&li
))) { 
6993                 redisSortOperation 
*sop 
= ln
->value
; 
6994                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
6997                 if (sop
->type 
== REDIS_SORT_GET
) { 
6999                         listAddNodeTail(listPtr
,createStringObject("",0)); 
7001                         /* We should do a incrRefCount on val because it is 
7002                          * added to the list, but also a decrRefCount because 
7003                          * it is returned by lookupKeyByPattern. This results 
7004                          * in doing nothing at all. */ 
7005                         listAddNodeTail(listPtr
,val
); 
7008                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
7012         if (dictReplace(c
->db
->dict
,storekey
,listObject
)) { 
7013             incrRefCount(storekey
); 
7015         /* Note: we add 1 because the DB is dirty anyway since even if the 
7016          * SORT result is empty a new key is set and maybe the old content 
7018         server
.dirty 
+= 1+outputlen
; 
7019         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
)); 
7023     decrRefCount(sortval
); 
7024     listRelease(operations
); 
7025     for (j 
= 0; j 
< vectorlen
; j
++) { 
7026         if (alpha 
&& vector
[j
].u
.cmpobj
) 
7027             decrRefCount(vector
[j
].u
.cmpobj
); 
7032 /* Convert an amount of bytes into a human readable string in the form 
7033  * of 100B, 2G, 100M, 4K, and so forth. */ 
7034 static void bytesToHuman(char *s
, unsigned long long n
) { 
7039         sprintf(s
,"%lluB",n
); 
7041     } else if (n 
< (1024*1024)) { 
7042         d 
= (double)n
/(1024); 
7043         sprintf(s
,"%.2fK",d
); 
7044     } else if (n 
< (1024LL*1024*1024)) { 
7045         d 
= (double)n
/(1024*1024); 
7046         sprintf(s
,"%.2fM",d
); 
7047     } else if (n 
< (1024LL*1024*1024*1024)) { 
7048         d 
= (double)n
/(1024LL*1024*1024); 
7049         sprintf(s
,"%.2fG",d
); 
7053 /* Create the string returned by the INFO command. This is decoupled 
7054  * by the INFO command itself as we need to report the same information 
7055  * on memory corruption problems. */ 
7056 static sds 
genRedisInfoString(void) { 
7058     time_t uptime 
= time(NULL
)-server
.stat_starttime
; 
7062     bytesToHuman(hmem
,zmalloc_used_memory()); 
7063     info 
= sdscatprintf(sdsempty(), 
7064         "redis_version:%s\r\n" 
7066         "multiplexing_api:%s\r\n" 
7067         "process_id:%ld\r\n" 
7068         "uptime_in_seconds:%ld\r\n" 
7069         "uptime_in_days:%ld\r\n" 
7070         "connected_clients:%d\r\n" 
7071         "connected_slaves:%d\r\n" 
7072         "blocked_clients:%d\r\n" 
7073         "used_memory:%zu\r\n" 
7074         "used_memory_human:%s\r\n" 
7075         "changes_since_last_save:%lld\r\n" 
7076         "bgsave_in_progress:%d\r\n" 
7077         "last_save_time:%ld\r\n" 
7078         "bgrewriteaof_in_progress:%d\r\n" 
7079         "total_connections_received:%lld\r\n" 
7080         "total_commands_processed:%lld\r\n" 
7081         "expired_keys:%lld\r\n" 
7082         "hash_max_zipmap_entries:%ld\r\n" 
7083         "hash_max_zipmap_value:%ld\r\n" 
7084         "pubsub_channels:%ld\r\n" 
7085         "pubsub_patterns:%u\r\n" 
7089         (sizeof(long) == 8) ? "64" : "32", 
7094         listLength(server
.clients
)-listLength(server
.slaves
), 
7095         listLength(server
.slaves
), 
7096         server
.blpop_blocked_clients
, 
7097         zmalloc_used_memory(), 
7100         server
.bgsavechildpid 
!= -1, 
7102         server
.bgrewritechildpid 
!= -1, 
7103         server
.stat_numconnections
, 
7104         server
.stat_numcommands
, 
7105         server
.stat_expiredkeys
, 
7106         server
.hash_max_zipmap_entries
, 
7107         server
.hash_max_zipmap_value
, 
7108         dictSize(server
.pubsub_channels
), 
7109         listLength(server
.pubsub_patterns
), 
7110         server
.vm_enabled 
!= 0, 
7111         server
.masterhost 
== NULL 
? "master" : "slave" 
7113     if (server
.masterhost
) { 
7114         info 
= sdscatprintf(info
, 
7115             "master_host:%s\r\n" 
7116             "master_port:%d\r\n" 
7117             "master_link_status:%s\r\n" 
7118             "master_last_io_seconds_ago:%d\r\n" 
7121             (server
.replstate 
== REDIS_REPL_CONNECTED
) ? 
7123             server
.master 
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1 
7126     if (server
.vm_enabled
) { 
7128         info 
= sdscatprintf(info
, 
7129             "vm_conf_max_memory:%llu\r\n" 
7130             "vm_conf_page_size:%llu\r\n" 
7131             "vm_conf_pages:%llu\r\n" 
7132             "vm_stats_used_pages:%llu\r\n" 
7133             "vm_stats_swapped_objects:%llu\r\n" 
7134             "vm_stats_swappin_count:%llu\r\n" 
7135             "vm_stats_swappout_count:%llu\r\n" 
7136             "vm_stats_io_newjobs_len:%lu\r\n" 
7137             "vm_stats_io_processing_len:%lu\r\n" 
7138             "vm_stats_io_processed_len:%lu\r\n" 
7139             "vm_stats_io_active_threads:%lu\r\n" 
7140             "vm_stats_blocked_clients:%lu\r\n" 
7141             ,(unsigned long long) server
.vm_max_memory
, 
7142             (unsigned long long) server
.vm_page_size
, 
7143             (unsigned long long) server
.vm_pages
, 
7144             (unsigned long long) server
.vm_stats_used_pages
, 
7145             (unsigned long long) server
.vm_stats_swapped_objects
, 
7146             (unsigned long long) server
.vm_stats_swapins
, 
7147             (unsigned long long) server
.vm_stats_swapouts
, 
7148             (unsigned long) listLength(server
.io_newjobs
), 
7149             (unsigned long) listLength(server
.io_processing
), 
7150             (unsigned long) listLength(server
.io_processed
), 
7151             (unsigned long) server
.io_active_threads
, 
7152             (unsigned long) server
.vm_blocked_clients
 
7156     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7157         long long keys
, vkeys
; 
7159         keys 
= dictSize(server
.db
[j
].dict
); 
7160         vkeys 
= dictSize(server
.db
[j
].expires
); 
7161         if (keys 
|| vkeys
) { 
7162             info 
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n", 
7169 static void infoCommand(redisClient 
*c
) { 
7170     sds info 
= genRedisInfoString(); 
7171     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n", 
7172         (unsigned long)sdslen(info
))); 
7173     addReplySds(c
,info
); 
7174     addReply(c
,shared
.crlf
); 
7177 static void monitorCommand(redisClient 
*c
) { 
7178     /* ignore MONITOR if aleady slave or in monitor mode */ 
7179     if (c
->flags 
& REDIS_SLAVE
) return; 
7181     c
->flags 
|= (REDIS_SLAVE
|REDIS_MONITOR
); 
7183     listAddNodeTail(server
.monitors
,c
); 
7184     addReply(c
,shared
.ok
); 
7187 /* ================================= Expire ================================= */ 
7188 static int removeExpire(redisDb 
*db
, robj 
*key
) { 
7189     if (dictDelete(db
->expires
,key
) == DICT_OK
) { 
7196 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
7197     if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) { 
7205 /* Return the expire time of the specified key, or -1 if no expire 
7206  * is associated with this key (i.e. the key is non volatile) */ 
7207 static time_t getExpire(redisDb 
*db
, robj 
*key
) { 
7210     /* No expire? return ASAP */ 
7211     if (dictSize(db
->expires
) == 0 || 
7212        (de 
= dictFind(db
->expires
,key
)) == NULL
) return -1; 
7214     return (time_t) dictGetEntryVal(de
); 
7217 static int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
7221     /* No expire? return ASAP */ 
7222     if (dictSize(db
->expires
) == 0 || 
7223        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
7225     /* Lookup the expire */ 
7226     when 
= (time_t) dictGetEntryVal(de
); 
7227     if (time(NULL
) <= when
) return 0; 
7229     /* Delete the key */ 
7230     dictDelete(db
->expires
,key
); 
7231     server
.stat_expiredkeys
++; 
7232     return dictDelete(db
->dict
,key
) == DICT_OK
; 
7235 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
) { 
7238     /* No expire? return ASAP */ 
7239     if (dictSize(db
->expires
) == 0 || 
7240        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
7242     /* Delete the key */ 
7244     server
.stat_expiredkeys
++; 
7245     dictDelete(db
->expires
,key
); 
7246     return dictDelete(db
->dict
,key
) == DICT_OK
; 
7249 static void expireGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*param
, long offset
) { 
7253     if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return; 
7257     de 
= dictFind(c
->db
->dict
,key
); 
7259         addReply(c
,shared
.czero
); 
7263         if (deleteKey(c
->db
,key
)) server
.dirty
++; 
7264         addReply(c
, shared
.cone
); 
7267         time_t when 
= time(NULL
)+seconds
; 
7268         if (setExpire(c
->db
,key
,when
)) { 
7269             addReply(c
,shared
.cone
); 
7272             addReply(c
,shared
.czero
); 
7278 static void expireCommand(redisClient 
*c
) { 
7279     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0); 
7282 static void expireatCommand(redisClient 
*c
) { 
7283     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
)); 
7286 static void ttlCommand(redisClient 
*c
) { 
7290     expire 
= getExpire(c
->db
,c
->argv
[1]); 
7292         ttl 
= (int) (expire
-time(NULL
)); 
7293         if (ttl 
< 0) ttl 
= -1; 
7295     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
)); 
7298 /* ================================ MULTI/EXEC ============================== */ 
7300 /* Client state initialization for MULTI/EXEC */ 
7301 static void initClientMultiState(redisClient 
*c
) { 
7302     c
->mstate
.commands 
= NULL
; 
7303     c
->mstate
.count 
= 0; 
7306 /* Release all the resources associated with MULTI/EXEC state */ 
7307 static void freeClientMultiState(redisClient 
*c
) { 
7310     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
7312         multiCmd 
*mc 
= c
->mstate
.commands
+j
; 
7314         for (i 
= 0; i 
< mc
->argc
; i
++) 
7315             decrRefCount(mc
->argv
[i
]); 
7318     zfree(c
->mstate
.commands
); 
7321 /* Add a new command into the MULTI commands queue */ 
7322 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
) { 
7326     c
->mstate
.commands 
= zrealloc(c
->mstate
.commands
, 
7327             sizeof(multiCmd
)*(c
->mstate
.count
+1)); 
7328     mc 
= c
->mstate
.commands
+c
->mstate
.count
; 
7331     mc
->argv 
= zmalloc(sizeof(robj
*)*c
->argc
); 
7332     memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
); 
7333     for (j 
= 0; j 
< c
->argc
; j
++) 
7334         incrRefCount(mc
->argv
[j
]); 
7338 static void multiCommand(redisClient 
*c
) { 
7339     c
->flags 
|= REDIS_MULTI
; 
7340     addReply(c
,shared
.ok
); 
7343 static void discardCommand(redisClient 
*c
) { 
7344     if (!(c
->flags 
& REDIS_MULTI
)) { 
7345         addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n")); 
7349     freeClientMultiState(c
); 
7350     initClientMultiState(c
); 
7351     c
->flags 
&= (~REDIS_MULTI
); 
7352     addReply(c
,shared
.ok
); 
7355 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand 
7356  * implememntation for more information. */ 
7357 static void execCommandReplicateMulti(redisClient 
*c
) { 
7358     struct redisCommand 
*cmd
; 
7359     robj 
*multistring 
= createStringObject("MULTI",5); 
7361     cmd 
= lookupCommand("multi"); 
7362     if (server
.appendonly
) 
7363         feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1); 
7364     if (listLength(server
.slaves
)) 
7365         replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1); 
7366     decrRefCount(multistring
); 
7369 static void execCommand(redisClient 
*c
) { 
7374     if (!(c
->flags 
& REDIS_MULTI
)) { 
7375         addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n")); 
7379     /* Replicate a MULTI request now that we are sure the block is executed. 
7380      * This way we'll deliver the MULTI/..../EXEC block as a whole and 
7381      * both the AOF and the replication link will have the same consistency 
7382      * and atomicity guarantees. */ 
7383     execCommandReplicateMulti(c
); 
7385     /* Exec all the queued commands */ 
7386     orig_argv 
= c
->argv
; 
7387     orig_argc 
= c
->argc
; 
7388     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
)); 
7389     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
7390         c
->argc 
= c
->mstate
.commands
[j
].argc
; 
7391         c
->argv 
= c
->mstate
.commands
[j
].argv
; 
7392         call(c
,c
->mstate
.commands
[j
].cmd
); 
7394     c
->argv 
= orig_argv
; 
7395     c
->argc 
= orig_argc
; 
7396     freeClientMultiState(c
); 
7397     initClientMultiState(c
); 
7398     c
->flags 
&= (~REDIS_MULTI
); 
7399     /* Make sure the EXEC command is always replicated / AOF, since we 
7400      * always send the MULTI command (we can't know beforehand if the 
7401      * next operations will contain at least a modification to the DB). */ 
7405 /* =========================== Blocking Operations  ========================= */ 
7407 /* Currently Redis blocking operations support is limited to list POP ops, 
7408  * so the current implementation is not fully generic, but it is also not 
7409  * completely specific so it will not require a rewrite to support new 
7410  * kind of blocking operations in the future. 
7412  * Still it's important to note that list blocking operations can be already 
7413  * used as a notification mechanism in order to implement other blocking 
7414  * operations at application level, so there must be a very strong evidence 
7415  * of usefulness and generality before new blocking operations are implemented. 
7417  * This is how the current blocking POP works, we use BLPOP as example: 
7418  * - If the user calls BLPOP and the key exists and contains a non empty list 
7419  *   then LPOP is called instead. So BLPOP is semantically the same as LPOP 
7420  *   if there is not to block. 
7421  * - If instead BLPOP is called and the key does not exists or the list is 
7422  *   empty we need to block. In order to do so we remove the notification for 
7423  *   new data to read in the client socket (so that we'll not serve new 
7424  *   requests if the blocking request is not served). Also we put the client 
7425  *   in a dictionary (db->blockingkeys) mapping keys to a list of clients 
7426  *   blocking for this keys. 
7427  * - If a PUSH operation against a key with blocked clients waiting is 
7428  *   performed, we serve the first in the list: basically instead to push 
7429  *   the new element inside the list we return it to the (first / oldest) 
7430  *   blocking client, unblock the client, and remove it form the list. 
7432  * The above comment and the source code should be enough in order to understand 
7433  * the implementation and modify / fix it later. 
7436 /* Set a client in blocking mode for the specified key, with the specified 
7438 static void blockForKeys(redisClient 
*c
, robj 
**keys
, int numkeys
, time_t timeout
) { 
7443     c
->blockingkeys 
= zmalloc(sizeof(robj
*)*numkeys
); 
7444     c
->blockingkeysnum 
= numkeys
; 
7445     c
->blockingto 
= timeout
; 
7446     for (j 
= 0; j 
< numkeys
; j
++) { 
7447         /* Add the key in the client structure, to map clients -> keys */ 
7448         c
->blockingkeys
[j
] = keys
[j
]; 
7449         incrRefCount(keys
[j
]); 
7451         /* And in the other "side", to map keys -> clients */ 
7452         de 
= dictFind(c
->db
->blockingkeys
,keys
[j
]); 
7456             /* For every key we take a list of clients blocked for it */ 
7458             retval 
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
); 
7459             incrRefCount(keys
[j
]); 
7460             assert(retval 
== DICT_OK
); 
7462             l 
= dictGetEntryVal(de
); 
7464         listAddNodeTail(l
,c
); 
7466     /* Mark the client as a blocked client */ 
7467     c
->flags 
|= REDIS_BLOCKED
; 
7468     server
.blpop_blocked_clients
++; 
7471 /* Unblock a client that's waiting in a blocking operation such as BLPOP */ 
7472 static void unblockClientWaitingData(redisClient 
*c
) { 
7477     assert(c
->blockingkeys 
!= NULL
); 
7478     /* The client may wait for multiple keys, so unblock it for every key. */ 
7479     for (j 
= 0; j 
< c
->blockingkeysnum
; j
++) { 
7480         /* Remove this client from the list of clients waiting for this key. */ 
7481         de 
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
7483         l 
= dictGetEntryVal(de
); 
7484         listDelNode(l
,listSearchKey(l
,c
)); 
7485         /* If the list is empty we need to remove it to avoid wasting memory */ 
7486         if (listLength(l
) == 0) 
7487             dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
7488         decrRefCount(c
->blockingkeys
[j
]); 
7490     /* Cleanup the client structure */ 
7491     zfree(c
->blockingkeys
); 
7492     c
->blockingkeys 
= NULL
; 
7493     c
->flags 
&= (~REDIS_BLOCKED
); 
7494     server
.blpop_blocked_clients
--; 
7495     /* We want to process data if there is some command waiting 
7496      * in the input buffer. Note that this is safe even if 
7497      * unblockClientWaitingData() gets called from freeClient() because 
7498      * freeClient() will be smart enough to call this function 
7499      * *after* c->querybuf was set to NULL. */ 
7500     if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
); 
7503 /* This should be called from any function PUSHing into lists. 
7504  * 'c' is the "pushing client", 'key' is the key it is pushing data against, 
7505  * 'ele' is the element pushed. 
7507  * If the function returns 0 there was no client waiting for a list push 
7510  * If the function returns 1 there was a client waiting for a list push 
7511  * against this key, the element was passed to this client thus it's not 
7512  * needed to actually add it to the list and the caller should return asap. */ 
7513 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
) { 
7514     struct dictEntry 
*de
; 
7515     redisClient 
*receiver
; 
7519     de 
= dictFind(c
->db
->blockingkeys
,key
); 
7520     if (de 
== NULL
) return 0; 
7521     l 
= dictGetEntryVal(de
); 
7524     receiver 
= ln
->value
; 
7526     addReplySds(receiver
,sdsnew("*2\r\n")); 
7527     addReplyBulk(receiver
,key
); 
7528     addReplyBulk(receiver
,ele
); 
7529     unblockClientWaitingData(receiver
); 
7533 /* Blocking RPOP/LPOP */ 
7534 static void blockingPopGenericCommand(redisClient 
*c
, int where
) { 
7539     for (j 
= 1; j 
< c
->argc
-1; j
++) { 
7540         o 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
7542             if (o
->type 
!= REDIS_LIST
) { 
7543                 addReply(c
,shared
.wrongtypeerr
); 
7546                 list 
*list 
= o
->ptr
; 
7547                 if (listLength(list
) != 0) { 
7548                     /* If the list contains elements fall back to the usual 
7549                      * non-blocking POP operation */ 
7550                     robj 
*argv
[2], **orig_argv
; 
7553                     /* We need to alter the command arguments before to call 
7554                      * popGenericCommand() as the command takes a single key. */ 
7555                     orig_argv 
= c
->argv
; 
7556                     orig_argc 
= c
->argc
; 
7557                     argv
[1] = c
->argv
[j
]; 
7561                     /* Also the return value is different, we need to output 
7562                      * the multi bulk reply header and the key name. The 
7563                      * "real" command will add the last element (the value) 
7564                      * for us. If this souds like an hack to you it's just 
7565                      * because it is... */ 
7566                     addReplySds(c
,sdsnew("*2\r\n")); 
7567                     addReplyBulk(c
,argv
[1]); 
7568                     popGenericCommand(c
,where
); 
7570                     /* Fix the client structure with the original stuff */ 
7571                     c
->argv 
= orig_argv
; 
7572                     c
->argc 
= orig_argc
; 
7578     /* If the list is empty or the key does not exists we must block */ 
7579     timeout 
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10); 
7580     if (timeout 
> 0) timeout 
+= time(NULL
); 
7581     blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
); 
7584 static void blpopCommand(redisClient 
*c
) { 
7585     blockingPopGenericCommand(c
,REDIS_HEAD
); 
7588 static void brpopCommand(redisClient 
*c
) { 
7589     blockingPopGenericCommand(c
,REDIS_TAIL
); 
7592 /* =============================== Replication  ============================= */ 
7594 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7595     ssize_t nwritten
, ret 
= size
; 
7596     time_t start 
= time(NULL
); 
7600         if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) { 
7601             nwritten 
= write(fd
,ptr
,size
); 
7602             if (nwritten 
== -1) return -1; 
7606         if ((time(NULL
)-start
) > timeout
) { 
7614 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7615     ssize_t nread
, totread 
= 0; 
7616     time_t start 
= time(NULL
); 
7620         if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) { 
7621             nread 
= read(fd
,ptr
,size
); 
7622             if (nread 
== -1) return -1; 
7627         if ((time(NULL
)-start
) > timeout
) { 
7635 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7642         if (syncRead(fd
,&c
,1,timeout
) == -1) return -1; 
7645             if (nread 
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0'; 
7656 static void syncCommand(redisClient 
*c
) { 
7657     /* ignore SYNC if aleady slave or in monitor mode */ 
7658     if (c
->flags 
& REDIS_SLAVE
) return; 
7660     /* SYNC can't be issued when the server has pending data to send to 
7661      * the client about already issued commands. We need a fresh reply 
7662      * buffer registering the differences between the BGSAVE and the current 
7663      * dataset, so that we can copy to other slaves if needed. */ 
7664     if (listLength(c
->reply
) != 0) { 
7665         addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n")); 
7669     redisLog(REDIS_NOTICE
,"Slave ask for synchronization"); 
7670     /* Here we need to check if there is a background saving operation 
7671      * in progress, or if it is required to start one */ 
7672     if (server
.bgsavechildpid 
!= -1) { 
7673         /* Ok a background save is in progress. Let's check if it is a good 
7674          * one for replication, i.e. if there is another slave that is 
7675          * registering differences since the server forked to save */ 
7680         listRewind(server
.slaves
,&li
); 
7681         while((ln 
= listNext(&li
))) { 
7683             if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) break; 
7686             /* Perfect, the server is already registering differences for 
7687              * another slave. Set the right state, and copy the buffer. */ 
7688             listRelease(c
->reply
); 
7689             c
->reply 
= listDup(slave
->reply
); 
7690             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7691             redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC"); 
7693             /* No way, we need to wait for the next BGSAVE in order to 
7694              * register differences */ 
7695             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
7696             redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC"); 
7699         /* Ok we don't have a BGSAVE in progress, let's start one */ 
7700         redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC"); 
7701         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
7702             redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE"); 
7703             addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n")); 
7706         c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7709     c
->flags 
|= REDIS_SLAVE
; 
7711     listAddNodeTail(server
.slaves
,c
); 
7715 static void sendBulkToSlave(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
7716     redisClient 
*slave 
= privdata
; 
7718     REDIS_NOTUSED(mask
); 
7719     char buf
[REDIS_IOBUF_LEN
]; 
7720     ssize_t nwritten
, buflen
; 
7722     if (slave
->repldboff 
== 0) { 
7723         /* Write the bulk write count before to transfer the DB. In theory here 
7724          * we don't know how much room there is in the output buffer of the 
7725          * socket, but in pratice SO_SNDLOWAT (the minimum count for output 
7726          * operations) will never be smaller than the few bytes we need. */ 
7729         bulkcount 
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long) 
7731         if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
)) 
7739     lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
); 
7740     buflen 
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
); 
7742         redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s", 
7743             (buflen 
== 0) ? "premature EOF" : strerror(errno
)); 
7747     if ((nwritten 
= write(fd
,buf
,buflen
)) == -1) { 
7748         redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s", 
7753     slave
->repldboff 
+= nwritten
; 
7754     if (slave
->repldboff 
== slave
->repldbsize
) { 
7755         close(slave
->repldbfd
); 
7756         slave
->repldbfd 
= -1; 
7757         aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
7758         slave
->replstate 
= REDIS_REPL_ONLINE
; 
7759         if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, 
7760             sendReplyToClient
, slave
) == AE_ERR
) { 
7764         addReplySds(slave
,sdsempty()); 
7765         redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded"); 
7769 /* This function is called at the end of every backgrond saving. 
7770  * The argument bgsaveerr is REDIS_OK if the background saving succeeded 
7771  * otherwise REDIS_ERR is passed to the function. 
7773  * The goal of this function is to handle slaves waiting for a successful 
7774  * background saving in order to perform non-blocking synchronization. */ 
7775 static void updateSlavesWaitingBgsave(int bgsaveerr
) { 
7777     int startbgsave 
= 0; 
7780     listRewind(server
.slaves
,&li
); 
7781     while((ln 
= listNext(&li
))) { 
7782         redisClient 
*slave 
= ln
->value
; 
7784         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) { 
7786             slave
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7787         } else if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) { 
7788             struct redis_stat buf
; 
7790             if (bgsaveerr 
!= REDIS_OK
) { 
7792                 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error"); 
7795             if ((slave
->repldbfd 
= open(server
.dbfilename
,O_RDONLY
)) == -1 || 
7796                 redis_fstat(slave
->repldbfd
,&buf
) == -1) { 
7798                 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
)); 
7801             slave
->repldboff 
= 0; 
7802             slave
->repldbsize 
= buf
.st_size
; 
7803             slave
->replstate 
= REDIS_REPL_SEND_BULK
; 
7804             aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
7805             if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) { 
7812         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
7815             listRewind(server
.slaves
,&li
); 
7816             redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed"); 
7817             while((ln 
= listNext(&li
))) { 
7818                 redisClient 
*slave 
= ln
->value
; 
7820                 if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) 
7827 static int syncWithMaster(void) { 
7828     char buf
[1024], tmpfile
[256], authcmd
[1024]; 
7830     int fd 
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
); 
7831     int dfd
, maxtries 
= 5; 
7834         redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s", 
7839     /* AUTH with the master if required. */ 
7840     if(server
.masterauth
) { 
7841         snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
); 
7842         if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) { 
7844             redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s", 
7848         /* Read the AUTH result.  */ 
7849         if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
7851             redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s", 
7855         if (buf
[0] != '+') { 
7857             redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?"); 
7862     /* Issue the SYNC command */ 
7863     if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) { 
7865         redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s", 
7869     /* Read the bulk write count */ 
7870     if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
7872         redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s", 
7876     if (buf
[0] != '$') { 
7878         redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); 
7881     dumpsize 
= strtol(buf
+1,NULL
,10); 
7882     redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
); 
7883     /* Read the bulk write data on a temp file */ 
7885         snprintf(tmpfile
,256, 
7886             "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid()); 
7887         dfd 
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644); 
7888         if (dfd 
!= -1) break; 
7893         redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
)); 
7897         int nread
, nwritten
; 
7899         nread 
= read(fd
,buf
,(dumpsize 
< 1024)?dumpsize
:1024); 
7901             redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s", 
7907         nwritten 
= write(dfd
,buf
,nread
); 
7908         if (nwritten 
== -1) { 
7909             redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
)); 
7917     if (rename(tmpfile
,server
.dbfilename
) == -1) { 
7918         redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
)); 
7924     if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
7925         redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk"); 
7929     server
.master 
= createClient(fd
); 
7930     server
.master
->flags 
|= REDIS_MASTER
; 
7931     server
.master
->authenticated 
= 1; 
7932     server
.replstate 
= REDIS_REPL_CONNECTED
; 
7936 static void slaveofCommand(redisClient 
*c
) { 
7937     if (!strcasecmp(c
->argv
[1]->ptr
,"no") && 
7938         !strcasecmp(c
->argv
[2]->ptr
,"one")) { 
7939         if (server
.masterhost
) { 
7940             sdsfree(server
.masterhost
); 
7941             server
.masterhost 
= NULL
; 
7942             if (server
.master
) freeClient(server
.master
); 
7943             server
.replstate 
= REDIS_REPL_NONE
; 
7944             redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)"); 
7947         sdsfree(server
.masterhost
); 
7948         server
.masterhost 
= sdsdup(c
->argv
[1]->ptr
); 
7949         server
.masterport 
= atoi(c
->argv
[2]->ptr
); 
7950         if (server
.master
) freeClient(server
.master
); 
7951         server
.replstate 
= REDIS_REPL_CONNECT
; 
7952         redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)", 
7953             server
.masterhost
, server
.masterport
); 
7955     addReply(c
,shared
.ok
); 
7958 /* ============================ Maxmemory directive  ======================== */ 
7960 /* Try to free one object form the pre-allocated objects free list. 
7961  * This is useful under low mem conditions as by default we take 1 million 
7962  * free objects allocated. On success REDIS_OK is returned, otherwise 
7964 static int tryFreeOneObjectFromFreelist(void) { 
7967     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
7968     if (listLength(server
.objfreelist
)) { 
7969         listNode 
*head 
= listFirst(server
.objfreelist
); 
7970         o 
= listNodeValue(head
); 
7971         listDelNode(server
.objfreelist
,head
); 
7972         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
7976         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
7981 /* This function gets called when 'maxmemory' is set on the config file to limit 
7982  * the max memory used by the server, and we are out of memory. 
7983  * This function will try to, in order: 
7985  * - Free objects from the free list 
7986  * - Try to remove keys with an EXPIRE set 
7988  * It is not possible to free enough memory to reach used-memory < maxmemory 
7989  * the server will start refusing commands that will enlarge even more the 
7992 static void freeMemoryIfNeeded(void) { 
7993     while (server
.maxmemory 
&& zmalloc_used_memory() > server
.maxmemory
) { 
7994         int j
, k
, freed 
= 0; 
7996         if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
7997         for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7999             robj 
*minkey 
= NULL
; 
8000             struct dictEntry 
*de
; 
8002             if (dictSize(server
.db
[j
].expires
)) { 
8004                 /* From a sample of three keys drop the one nearest to 
8005                  * the natural expire */ 
8006                 for (k 
= 0; k 
< 3; k
++) { 
8009                     de 
= dictGetRandomKey(server
.db
[j
].expires
); 
8010                     t 
= (time_t) dictGetEntryVal(de
); 
8011                     if (minttl 
== -1 || t 
< minttl
) { 
8012                         minkey 
= dictGetEntryKey(de
); 
8016                 deleteKey(server
.db
+j
,minkey
); 
8019         if (!freed
) return; /* nothing to free... */ 
8023 /* ============================== Append Only file ========================== */ 
8025 /* Write the append only file buffer on disk. 
8027  * Since we are required to write the AOF before replying to the client, 
8028  * and the only way the client socket can get a write is entering when the 
8029  * the event loop, we accumulate all the AOF writes in a memory 
8030  * buffer and write it on disk using this function just before entering 
8031  * the event loop again. */ 
8032 static void flushAppendOnlyFile(void) { 
8036     if (sdslen(server
.aofbuf
) == 0) return; 
8038     /* We want to perform a single write. This should be guaranteed atomic 
8039      * at least if the filesystem we are writing is a real physical one. 
8040      * While this will save us against the server being killed I don't think 
8041      * there is much to do about the whole server stopping for power problems 
8043      nwritten 
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
)); 
8044      if (nwritten 
!= (signed)sdslen(server
.aofbuf
)) { 
8045         /* Ooops, we are in troubles. The best thing to do for now is 
8046          * aborting instead of giving the illusion that everything is 
8047          * working as expected. */ 
8048          if (nwritten 
== -1) { 
8049             redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
)); 
8051             redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
)); 
8055     sdsfree(server
.aofbuf
); 
8056     server
.aofbuf 
= sdsempty(); 
8058     /* Fsync if needed */ 
8060     if (server
.appendfsync 
== APPENDFSYNC_ALWAYS 
|| 
8061         (server
.appendfsync 
== APPENDFSYNC_EVERYSEC 
&& 
8062          now
-server
.lastfsync 
> 1)) 
8064         /* aof_fsync is defined as fdatasync() for Linux in order to avoid 
8065          * flushing metadata. */ 
8066         aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */ 
8067         server
.lastfsync 
= now
; 
8071 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
8072     sds buf 
= sdsempty(); 
8076     /* The DB this command was targetting is not the same as the last command 
8077      * we appendend. To issue a SELECT command is needed. */ 
8078     if (dictid 
!= server
.appendseldb
) { 
8081         snprintf(seldb
,sizeof(seldb
),"%d",dictid
); 
8082         buf 
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", 
8083             (unsigned long)strlen(seldb
),seldb
); 
8084         server
.appendseldb 
= dictid
; 
8087     /* "Fix" the argv vector if the command is EXPIRE. We want to translate 
8088      * EXPIREs into EXPIREATs calls */ 
8089     if (cmd
->proc 
== expireCommand
) { 
8092         tmpargv
[0] = createStringObject("EXPIREAT",8); 
8093         tmpargv
[1] = argv
[1]; 
8094         incrRefCount(argv
[1]); 
8095         when 
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10); 
8096         tmpargv
[2] = createObject(REDIS_STRING
, 
8097             sdscatprintf(sdsempty(),"%ld",when
)); 
8101     /* Append the actual command */ 
8102     buf 
= sdscatprintf(buf
,"*%d\r\n",argc
); 
8103     for (j 
= 0; j 
< argc
; j
++) { 
8106         o 
= getDecodedObject(o
); 
8107         buf 
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
)); 
8108         buf 
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
)); 
8109         buf 
= sdscatlen(buf
,"\r\n",2); 
8113     /* Free the objects from the modified argv for EXPIREAT */ 
8114     if (cmd
->proc 
== expireCommand
) { 
8115         for (j 
= 0; j 
< 3; j
++) 
8116             decrRefCount(argv
[j
]); 
8119     /* Append to the AOF buffer. This will be flushed on disk just before 
8120      * of re-entering the event loop, so before the client will get a 
8121      * positive reply about the operation performed. */ 
8122     server
.aofbuf 
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
)); 
8124     /* If a background append only file rewriting is in progress we want to 
8125      * accumulate the differences between the child DB and the current one 
8126      * in a buffer, so that when the child process will do its work we 
8127      * can append the differences to the new append only file. */ 
8128     if (server
.bgrewritechildpid 
!= -1) 
8129         server
.bgrewritebuf 
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
)); 
8134 /* In Redis commands are always executed in the context of a client, so in 
8135  * order to load the append only file we need to create a fake client. */ 
8136 static struct redisClient 
*createFakeClient(void) { 
8137     struct redisClient 
*c 
= zmalloc(sizeof(*c
)); 
8141     c
->querybuf 
= sdsempty(); 
8145     /* We set the fake client as a slave waiting for the synchronization 
8146      * so that Redis will not try to send replies to this client. */ 
8147     c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
8148     c
->reply 
= listCreate(); 
8149     listSetFreeMethod(c
->reply
,decrRefCount
); 
8150     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
8151     initClientMultiState(c
); 
8155 static void freeFakeClient(struct redisClient 
*c
) { 
8156     sdsfree(c
->querybuf
); 
8157     listRelease(c
->reply
); 
8158     freeClientMultiState(c
); 
8162 /* Replay the append log file. On error REDIS_OK is returned. On non fatal 
8163  * error (the append only file is zero-length) REDIS_ERR is returned. On 
8164  * fatal error an error message is logged and the program exists. */ 
8165 int loadAppendOnlyFile(char *filename
) { 
8166     struct redisClient 
*fakeClient
; 
8167     FILE *fp 
= fopen(filename
,"r"); 
8168     struct redis_stat sb
; 
8169     unsigned long long loadedkeys 
= 0; 
8170     int appendonly 
= server
.appendonly
; 
8172     if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size 
== 0) 
8176         redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
)); 
8180     /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI 
8181      * to the same file we're about to read. */ 
8182     server
.appendonly 
= 0; 
8184     fakeClient 
= createFakeClient(); 
8191         struct redisCommand 
*cmd
; 
8193         if (fgets(buf
,sizeof(buf
),fp
) == NULL
) { 
8199         if (buf
[0] != '*') goto fmterr
; 
8201         argv 
= zmalloc(sizeof(robj
*)*argc
); 
8202         for (j 
= 0; j 
< argc
; j
++) { 
8203             if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
; 
8204             if (buf
[0] != '$') goto fmterr
; 
8205             len 
= strtol(buf
+1,NULL
,10); 
8206             argsds 
= sdsnewlen(NULL
,len
); 
8207             if (len 
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
; 
8208             argv
[j
] = createObject(REDIS_STRING
,argsds
); 
8209             if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */ 
8212         /* Command lookup */ 
8213         cmd 
= lookupCommand(argv
[0]->ptr
); 
8215             redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
); 
8218         /* Try object encoding */ 
8219         if (cmd
->flags 
& REDIS_CMD_BULK
) 
8220             argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]); 
8221         /* Run the command in the context of a fake client */ 
8222         fakeClient
->argc 
= argc
; 
8223         fakeClient
->argv 
= argv
; 
8224         cmd
->proc(fakeClient
); 
8225         /* Discard the reply objects list from the fake client */ 
8226         while(listLength(fakeClient
->reply
)) 
8227             listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
)); 
8228         /* Clean up, ready for the next command */ 
8229         for (j 
= 0; j 
< argc
; j
++) decrRefCount(argv
[j
]); 
8231         /* Handle swapping while loading big datasets when VM is on */ 
8233         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
8234             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
8235                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
8240     /* This point can only be reached when EOF is reached without errors. 
8241      * If the client is in the middle of a MULTI/EXEC, log error and quit. */ 
8242     if (fakeClient
->flags 
& REDIS_MULTI
) goto readerr
; 
8245     freeFakeClient(fakeClient
); 
8246     server
.appendonly 
= appendonly
; 
8251         redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file"); 
8253         redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
)); 
8257     redisLog(REDIS_WARNING
,"Bad file format reading the append only file"); 
8261 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */ 
8262 static int fwriteBulkObject(FILE *fp
, robj 
*obj
) { 
8266     /* Avoid the incr/decr ref count business if possible to help 
8267      * copy-on-write (we are often in a child process when this function 
8269      * Also makes sure that key objects don't get incrRefCount-ed when VM 
8271     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
8272         obj 
= getDecodedObject(obj
); 
8275     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
)); 
8276     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
; 
8277     if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0) 
8279     if (fwrite("\r\n",2,1,fp
) == 0) goto err
; 
8280     if (decrrc
) decrRefCount(obj
); 
8283     if (decrrc
) decrRefCount(obj
); 
8287 /* Write binary-safe string into a file in the bulkformat 
8288  * $<count>\r\n<payload>\r\n */ 
8289 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) { 
8292     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
); 
8293     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
8294     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return 0; 
8295     if (fwrite("\r\n",2,1,fp
) == 0) return 0; 
8299 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */ 
8300 static int fwriteBulkDouble(FILE *fp
, double d
) { 
8301     char buf
[128], dbuf
[128]; 
8303     snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
); 
8304     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2); 
8305     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
8306     if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0; 
8310 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */ 
8311 static int fwriteBulkLong(FILE *fp
, long l
) { 
8312     char buf
[128], lbuf
[128]; 
8314     snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
); 
8315     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2); 
8316     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
8317     if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0; 
8321 /* Write a sequence of commands able to fully rebuild the dataset into 
8322  * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ 
8323 static int rewriteAppendOnlyFile(char *filename
) { 
8324     dictIterator 
*di 
= NULL
; 
8329     time_t now 
= time(NULL
); 
8331     /* Note that we have to use a different temp name here compared to the 
8332      * one used by rewriteAppendOnlyFileBackground() function. */ 
8333     snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid()); 
8334     fp 
= fopen(tmpfile
,"w"); 
8336         redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
)); 
8339     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8340         char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n"; 
8341         redisDb 
*db 
= server
.db
+j
; 
8343         if (dictSize(d
) == 0) continue; 
8344         di 
= dictGetIterator(d
); 
8350         /* SELECT the new DB */ 
8351         if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
; 
8352         if (fwriteBulkLong(fp
,j
) == 0) goto werr
; 
8354         /* Iterate this DB writing every entry */ 
8355         while((de 
= dictNext(di
)) != NULL
) { 
8360             key 
= dictGetEntryKey(de
); 
8361             /* If the value for this key is swapped, load a preview in memory. 
8362              * We use a "swapped" flag to remember if we need to free the 
8363              * value object instead to just increment the ref count anyway 
8364              * in order to avoid copy-on-write of pages if we are forked() */ 
8365             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
8366                 key
->storage 
== REDIS_VM_SWAPPING
) { 
8367                 o 
= dictGetEntryVal(de
); 
8370                 o 
= vmPreviewObject(key
); 
8373             expiretime 
= getExpire(db
,key
); 
8375             /* Save the key and associated value */ 
8376             if (o
->type 
== REDIS_STRING
) { 
8377                 /* Emit a SET command */ 
8378                 char cmd
[]="*3\r\n$3\r\nSET\r\n"; 
8379                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8381                 if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8382                 if (fwriteBulkObject(fp
,o
) == 0) goto werr
; 
8383             } else if (o
->type 
== REDIS_LIST
) { 
8384                 /* Emit the RPUSHes needed to rebuild the list */ 
8385                 list 
*list 
= o
->ptr
; 
8389                 listRewind(list
,&li
); 
8390                 while((ln 
= listNext(&li
))) { 
8391                     char cmd
[]="*3\r\n$5\r\nRPUSH\r\n"; 
8392                     robj 
*eleobj 
= listNodeValue(ln
); 
8394                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8395                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8396                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
8398             } else if (o
->type 
== REDIS_SET
) { 
8399                 /* Emit the SADDs needed to rebuild the set */ 
8401                 dictIterator 
*di 
= dictGetIterator(set
); 
8404                 while((de 
= dictNext(di
)) != NULL
) { 
8405                     char cmd
[]="*3\r\n$4\r\nSADD\r\n"; 
8406                     robj 
*eleobj 
= dictGetEntryKey(de
); 
8408                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8409                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8410                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
8412                 dictReleaseIterator(di
); 
8413             } else if (o
->type 
== REDIS_ZSET
) { 
8414                 /* Emit the ZADDs needed to rebuild the sorted set */ 
8416                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
8419                 while((de 
= dictNext(di
)) != NULL
) { 
8420                     char cmd
[]="*4\r\n$4\r\nZADD\r\n"; 
8421                     robj 
*eleobj 
= dictGetEntryKey(de
); 
8422                     double *score 
= dictGetEntryVal(de
); 
8424                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8425                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8426                     if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
; 
8427                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
8429                 dictReleaseIterator(di
); 
8430             } else if (o
->type 
== REDIS_HASH
) { 
8431                 char cmd
[]="*4\r\n$4\r\nHSET\r\n"; 
8433                 /* Emit the HSETs needed to rebuild the hash */ 
8434                 if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
8435                     unsigned char *p 
= zipmapRewind(o
->ptr
); 
8436                     unsigned char *field
, *val
; 
8437                     unsigned int flen
, vlen
; 
8439                     while((p 
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) { 
8440                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8441                         if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8442                         if (fwriteBulkString(fp
,(char*)field
,flen
) == -1) 
8444                         if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1) 
8448                     dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
8451                     while((de 
= dictNext(di
)) != NULL
) { 
8452                         robj 
*field 
= dictGetEntryKey(de
); 
8453                         robj 
*val 
= dictGetEntryVal(de
); 
8455                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8456                         if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8457                         if (fwriteBulkObject(fp
,field
) == -1) return -1; 
8458                         if (fwriteBulkObject(fp
,val
) == -1) return -1; 
8460                     dictReleaseIterator(di
); 
8463                 redisPanic("Unknown object type"); 
8465             /* Save the expire time */ 
8466             if (expiretime 
!= -1) { 
8467                 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n"; 
8468                 /* If this key is already expired skip it */ 
8469                 if (expiretime 
< now
) continue; 
8470                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8471                 if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8472                 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
; 
8474             if (swapped
) decrRefCount(o
); 
8476         dictReleaseIterator(di
); 
8479     /* Make sure data will not remain on the OS's output buffers */ 
8484     /* Use RENAME to make sure the DB file is changed atomically only 
8485      * if the generate DB file is ok. */ 
8486     if (rename(tmpfile
,filename
) == -1) { 
8487         redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
)); 
8491     redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed"); 
8497     redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
)); 
8498     if (di
) dictReleaseIterator(di
); 
8502 /* This is how rewriting of the append only file in background works: 
8504  * 1) The user calls BGREWRITEAOF 
8505  * 2) Redis calls this function, that forks(): 
8506  *    2a) the child rewrite the append only file in a temp file. 
8507  *    2b) the parent accumulates differences in server.bgrewritebuf. 
8508  * 3) When the child finished '2a' exists. 
8509  * 4) The parent will trap the exit code, if it's OK, will append the 
8510  *    data accumulated into server.bgrewritebuf into the temp file, and 
8511  *    finally will rename(2) the temp file in the actual file name. 
8512  *    The the new file is reopened as the new append only file. Profit! 
8514 static int rewriteAppendOnlyFileBackground(void) { 
8517     if (server
.bgrewritechildpid 
!= -1) return REDIS_ERR
; 
8518     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
8519     if ((childpid 
= fork()) == 0) { 
8523         if (server
.vm_enabled
) vmReopenSwapFile(); 
8525         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); 
8526         if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) { 
8533         if (childpid 
== -1) { 
8534             redisLog(REDIS_WARNING
, 
8535                 "Can't rewrite append only file in background: fork: %s", 
8539         redisLog(REDIS_NOTICE
, 
8540             "Background append only file rewriting started by pid %d",childpid
); 
8541         server
.bgrewritechildpid 
= childpid
; 
8542         updateDictResizePolicy(); 
8543         /* We set appendseldb to -1 in order to force the next call to the 
8544          * feedAppendOnlyFile() to issue a SELECT command, so the differences 
8545          * accumulated by the parent into server.bgrewritebuf will start 
8546          * with a SELECT statement and it will be safe to merge. */ 
8547         server
.appendseldb 
= -1; 
8550     return REDIS_OK
; /* unreached */ 
8553 static void bgrewriteaofCommand(redisClient 
*c
) { 
8554     if (server
.bgrewritechildpid 
!= -1) { 
8555         addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n")); 
8558     if (rewriteAppendOnlyFileBackground() == REDIS_OK
) { 
8559         char *status 
= "+Background append only file rewriting started\r\n"; 
8560         addReplySds(c
,sdsnew(status
)); 
8562         addReply(c
,shared
.err
); 
8566 static void aofRemoveTempFile(pid_t childpid
) { 
8569     snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
); 
8573 /* Virtual Memory is composed mainly of two subsystems: 
8574  * - Blocking Virutal Memory 
8575  * - Threaded Virtual Memory I/O 
8576  * The two parts are not fully decoupled, but functions are split among two 
8577  * different sections of the source code (delimited by comments) in order to 
8578  * make more clear what functionality is about the blocking VM and what about 
8579  * the threaded (not blocking) VM. 
8583  * Redis VM is a blocking VM (one that blocks reading swapped values from 
8584  * disk into memory when a value swapped out is needed in memory) that is made 
8585  * unblocking by trying to examine the command argument vector in order to 
8586  * load in background values that will likely be needed in order to exec 
8587  * the command. The command is executed only once all the relevant keys 
8588  * are loaded into memory. 
8590  * This basically is almost as simple of a blocking VM, but almost as parallel 
8591  * as a fully non-blocking VM. 
8594 /* =================== Virtual Memory - Blocking Side  ====================== */ 
8596 static void vmInit(void) { 
8602     if (server
.vm_max_threads 
!= 0) 
8603         zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */ 
8605     redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
); 
8606     /* Try to open the old swap file, otherwise create it */ 
8607     if ((server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) { 
8608         server
.vm_fp 
= fopen(server
.vm_swap_file
,"w+b"); 
8610     if (server
.vm_fp 
== NULL
) { 
8611         redisLog(REDIS_WARNING
, 
8612             "Can't open the swap file: %s. Exiting.", 
8616     server
.vm_fd 
= fileno(server
.vm_fp
); 
8617     /* Lock the swap file for writing, this is useful in order to avoid 
8618      * another instance to use the same swap file for a config error. */ 
8619     fl
.l_type 
= F_WRLCK
; 
8620     fl
.l_whence 
= SEEK_SET
; 
8621     fl
.l_start 
= fl
.l_len 
= 0; 
8622     if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) { 
8623         redisLog(REDIS_WARNING
, 
8624             "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
)); 
8628     server
.vm_next_page 
= 0; 
8629     server
.vm_near_pages 
= 0; 
8630     server
.vm_stats_used_pages 
= 0; 
8631     server
.vm_stats_swapped_objects 
= 0; 
8632     server
.vm_stats_swapouts 
= 0; 
8633     server
.vm_stats_swapins 
= 0; 
8634     totsize 
= server
.vm_pages
*server
.vm_page_size
; 
8635     redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
); 
8636     if (ftruncate(server
.vm_fd
,totsize
) == -1) { 
8637         redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.", 
8641         redisLog(REDIS_NOTICE
,"Swap file allocated with success"); 
8643     server
.vm_bitmap 
= zmalloc((server
.vm_pages
+7)/8); 
8644     redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages", 
8645         (long long) (server
.vm_pages
+7)/8, server
.vm_pages
); 
8646     memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8); 
8648     /* Initialize threaded I/O (used by Virtual Memory) */ 
8649     server
.io_newjobs 
= listCreate(); 
8650     server
.io_processing 
= listCreate(); 
8651     server
.io_processed 
= listCreate(); 
8652     server
.io_ready_clients 
= listCreate(); 
8653     pthread_mutex_init(&server
.io_mutex
,NULL
); 
8654     pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
); 
8655     pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
); 
8656     server
.io_active_threads 
= 0; 
8657     if (pipe(pipefds
) == -1) { 
8658         redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting." 
8662     server
.io_ready_pipe_read 
= pipefds
[0]; 
8663     server
.io_ready_pipe_write 
= pipefds
[1]; 
8664     redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
); 
8665     /* LZF requires a lot of stack */ 
8666     pthread_attr_init(&server
.io_threads_attr
); 
8667     pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
); 
8668     while (stacksize 
< REDIS_THREAD_STACK_SIZE
) stacksize 
*= 2; 
8669     pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
); 
8670     /* Listen for events in the threaded I/O pipe */ 
8671     if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
, 
8672         vmThreadedIOCompletedJob
, NULL
) == AE_ERR
) 
8673         oom("creating file event"); 
8676 /* Mark the page as used */ 
8677 static void vmMarkPageUsed(off_t page
) { 
8678     off_t byte 
= page
/8; 
8680     redisAssert(vmFreePage(page
) == 1); 
8681     server
.vm_bitmap
[byte
] |= 1<<bit
; 
8684 /* Mark N contiguous pages as used, with 'page' being the first. */ 
8685 static void vmMarkPagesUsed(off_t page
, off_t count
) { 
8688     for (j 
= 0; j 
< count
; j
++) 
8689         vmMarkPageUsed(page
+j
); 
8690     server
.vm_stats_used_pages 
+= count
; 
8691     redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n", 
8692         (long long)count
, (long long)page
); 
8695 /* Mark the page as free */ 
8696 static void vmMarkPageFree(off_t page
) { 
8697     off_t byte 
= page
/8; 
8699     redisAssert(vmFreePage(page
) == 0); 
8700     server
.vm_bitmap
[byte
] &= ~(1<<bit
); 
8703 /* Mark N contiguous pages as free, with 'page' being the first. */ 
8704 static void vmMarkPagesFree(off_t page
, off_t count
) { 
8707     for (j 
= 0; j 
< count
; j
++) 
8708         vmMarkPageFree(page
+j
); 
8709     server
.vm_stats_used_pages 
-= count
; 
8710     redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n", 
8711         (long long)count
, (long long)page
); 
8714 /* Test if the page is free */ 
8715 static int vmFreePage(off_t page
) { 
8716     off_t byte 
= page
/8; 
8718     return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0; 
8721 /* Find N contiguous free pages storing the first page of the cluster in *first. 
8722  * Returns REDIS_OK if it was able to find N contiguous pages, otherwise 
8723  * REDIS_ERR is returned. 
8725  * This function uses a simple algorithm: we try to allocate 
8726  * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start 
8727  * again from the start of the swap file searching for free spaces. 
8729  * If it looks pretty clear that there are no free pages near our offset 
8730  * we try to find less populated places doing a forward jump of 
8731  * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages 
8732  * without hurry, and then we jump again and so forth... 
8734  * This function can be improved using a free list to avoid to guess 
8735  * too much, since we could collect data about freed pages. 
8737  * note: I implemented this function just after watching an episode of 
8738  * Battlestar Galactica, where the hybrid was continuing to say "JUMP!" 
8740 static int vmFindContiguousPages(off_t 
*first
, off_t n
) { 
8741     off_t base
, offset 
= 0, since_jump 
= 0, numfree 
= 0; 
8743     if (server
.vm_near_pages 
== REDIS_VM_MAX_NEAR_PAGES
) { 
8744         server
.vm_near_pages 
= 0; 
8745         server
.vm_next_page 
= 0; 
8747     server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */ 
8748     base 
= server
.vm_next_page
; 
8750     while(offset 
< server
.vm_pages
) { 
8751         off_t 
this = base
+offset
; 
8753         /* If we overflow, restart from page zero */ 
8754         if (this >= server
.vm_pages
) { 
8755             this -= server
.vm_pages
; 
8757                 /* Just overflowed, what we found on tail is no longer 
8758                  * interesting, as it's no longer contiguous. */ 
8762         if (vmFreePage(this)) { 
8763             /* This is a free page */ 
8765             /* Already got N free pages? Return to the caller, with success */ 
8767                 *first 
= this-(n
-1); 
8768                 server
.vm_next_page 
= this+1; 
8769                 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
); 
8773             /* The current one is not a free page */ 
8777         /* Fast-forward if the current page is not free and we already 
8778          * searched enough near this place. */ 
8780         if (!numfree 
&& since_jump 
>= REDIS_VM_MAX_RANDOM_JUMP
/4) { 
8781             offset 
+= random() % REDIS_VM_MAX_RANDOM_JUMP
; 
8783             /* Note that even if we rewind after the jump, we are don't need 
8784              * to make sure numfree is set to zero as we only jump *if* it 
8785              * is set to zero. */ 
8787             /* Otherwise just check the next page */ 
8794 /* Write the specified object at the specified page of the swap file */ 
8795 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
) { 
8796     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
8797     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
8798         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8799         redisLog(REDIS_WARNING
, 
8800             "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s", 
8804     rdbSaveObject(server
.vm_fp
,o
); 
8805     fflush(server
.vm_fp
); 
8806     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8810 /* Swap the 'val' object relative to 'key' into disk. Store all the information 
8811  * needed to later retrieve the object into the key object. 
8812  * If we can't find enough contiguous empty pages to swap the object on disk 
8813  * REDIS_ERR is returned. */ 
8814 static int vmSwapObjectBlocking(robj 
*key
, robj 
*val
) { 
8815     off_t pages 
= rdbSavedObjectPages(val
,NULL
); 
8818     assert(key
->storage 
== REDIS_VM_MEMORY
); 
8819     assert(key
->refcount 
== 1); 
8820     if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
; 
8821     if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
; 
8822     key
->vm
.page 
= page
; 
8823     key
->vm
.usedpages 
= pages
; 
8824     key
->storage 
= REDIS_VM_SWAPPED
; 
8825     key
->vtype 
= val
->type
; 
8826     decrRefCount(val
); /* Deallocate the object from memory. */ 
8827     vmMarkPagesUsed(page
,pages
); 
8828     redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)", 
8829         (unsigned char*) key
->ptr
, 
8830         (unsigned long long) page
, (unsigned long long) pages
); 
8831     server
.vm_stats_swapped_objects
++; 
8832     server
.vm_stats_swapouts
++; 
8836 static robj 
*vmReadObjectFromSwap(off_t page
, int type
) { 
8839     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
8840     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
8841         redisLog(REDIS_WARNING
, 
8842             "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s", 
8846     o 
= rdbLoadObject(type
,server
.vm_fp
); 
8848         redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
)); 
8851     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8855 /* Load the value object relative to the 'key' object from swap to memory. 
8856  * The newly allocated object is returned. 
8858  * If preview is true the unserialized object is returned to the caller but 
8859  * no changes are made to the key object, nor the pages are marked as freed */ 
8860 static robj 
*vmGenericLoadObject(robj 
*key
, int preview
) { 
8863     redisAssert(key
->storage 
== REDIS_VM_SWAPPED 
|| key
->storage 
== REDIS_VM_LOADING
); 
8864     val 
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
); 
8866         key
->storage 
= REDIS_VM_MEMORY
; 
8867         key
->vm
.atime 
= server
.unixtime
; 
8868         vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
8869         redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk", 
8870             (unsigned char*) key
->ptr
); 
8871         server
.vm_stats_swapped_objects
--; 
8873         redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk", 
8874             (unsigned char*) key
->ptr
); 
8876     server
.vm_stats_swapins
++; 
8880 /* Plain object loading, from swap to memory */ 
8881 static robj 
*vmLoadObject(robj 
*key
) { 
8882     /* If we are loading the object in background, stop it, we 
8883      * need to load this object synchronously ASAP. */ 
8884     if (key
->storage 
== REDIS_VM_LOADING
) 
8885         vmCancelThreadedIOJob(key
); 
8886     return vmGenericLoadObject(key
,0); 
8889 /* Just load the value on disk, without to modify the key. 
8890  * This is useful when we want to perform some operation on the value 
8891  * without to really bring it from swap to memory, like while saving the 
8892  * dataset or rewriting the append only log. */ 
8893 static robj 
*vmPreviewObject(robj 
*key
) { 
8894     return vmGenericLoadObject(key
,1); 
8897 /* How a good candidate is this object for swapping? 
8898  * The better candidate it is, the greater the returned value. 
8900  * Currently we try to perform a fast estimation of the object size in 
8901  * memory, and combine it with aging informations. 
8903  * Basically swappability = idle-time * log(estimated size) 
8905  * Bigger objects are preferred over smaller objects, but not 
8906  * proportionally, this is why we use the logarithm. This algorithm is 
8907  * just a first try and will probably be tuned later. */ 
8908 static double computeObjectSwappability(robj 
*o
) { 
8909     time_t age 
= server
.unixtime 
- o
->vm
.atime
; 
8913     struct dictEntry 
*de
; 
8916     if (age 
<= 0) return 0; 
8919         if (o
->encoding 
!= REDIS_ENCODING_RAW
) { 
8922             asize 
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2; 
8927         listNode 
*ln 
= listFirst(l
); 
8929         asize 
= sizeof(list
); 
8931             robj 
*ele 
= ln
->value
; 
8934             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8935                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8937             asize 
+= (sizeof(listNode
)+elesize
)*listLength(l
); 
8942         z 
= (o
->type 
== REDIS_ZSET
); 
8943         d 
= z 
? ((zset
*)o
->ptr
)->dict 
: o
->ptr
; 
8945         asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
8946         if (z
) asize 
+= sizeof(zset
)-sizeof(dict
); 
8951             de 
= dictGetRandomKey(d
); 
8952             ele 
= dictGetEntryKey(de
); 
8953             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8954                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8956             asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
8957             if (z
) asize 
+= sizeof(zskiplistNode
)*dictSize(d
); 
8961         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
8962             unsigned char *p 
= zipmapRewind((unsigned char*)o
->ptr
); 
8963             unsigned int len 
= zipmapLen((unsigned char*)o
->ptr
); 
8964             unsigned int klen
, vlen
; 
8965             unsigned char *key
, *val
; 
8967             if ((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) { 
8971             asize 
= len
*(klen
+vlen
+3); 
8972         } else if (o
->encoding 
== REDIS_ENCODING_HT
) { 
8974             asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
8979                 de 
= dictGetRandomKey(d
); 
8980                 ele 
= dictGetEntryKey(de
); 
8981                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8982                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8984                 ele 
= dictGetEntryVal(de
); 
8985                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
8986                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : 
8988                 asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
8993     return (double)age
*log(1+asize
); 
8996 /* Try to swap an object that's a good candidate for swapping. 
8997  * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible 
8998  * to swap any object at all. 
9000  * If 'usethreaded' is true, Redis will try to swap the object in background 
9001  * using I/O threads. */ 
9002 static int vmSwapOneObject(int usethreads
) { 
9004     struct dictEntry 
*best 
= NULL
; 
9005     double best_swappability 
= 0; 
9006     redisDb 
*best_db 
= NULL
; 
9009     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
9010         redisDb 
*db 
= server
.db
+j
; 
9011         /* Why maxtries is set to 100? 
9012          * Because this way (usually) we'll find 1 object even if just 1% - 2% 
9013          * are swappable objects */ 
9016         if (dictSize(db
->dict
) == 0) continue; 
9017         for (i 
= 0; i 
< 5; i
++) { 
9019             double swappability
; 
9021             if (maxtries
) maxtries
--; 
9022             de 
= dictGetRandomKey(db
->dict
); 
9023             key 
= dictGetEntryKey(de
); 
9024             val 
= dictGetEntryVal(de
); 
9025             /* Only swap objects that are currently in memory. 
9027              * Also don't swap shared objects if threaded VM is on, as we 
9028              * try to ensure that the main thread does not touch the 
9029              * object while the I/O thread is using it, but we can't 
9030              * control other keys without adding additional mutex. */ 
9031             if (key
->storage 
!= REDIS_VM_MEMORY 
|| 
9032                 (server
.vm_max_threads 
!= 0 && val
->refcount 
!= 1)) { 
9033                 if (maxtries
) i
--; /* don't count this try */ 
9036             swappability 
= computeObjectSwappability(val
); 
9037             if (!best 
|| swappability 
> best_swappability
) { 
9039                 best_swappability 
= swappability
; 
9044     if (best 
== NULL
) return REDIS_ERR
; 
9045     key 
= dictGetEntryKey(best
); 
9046     val 
= dictGetEntryVal(best
); 
9048     redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f", 
9049         key
->ptr
, best_swappability
); 
9051     /* Unshare the key if needed */ 
9052     if (key
->refcount 
> 1) { 
9053         robj 
*newkey 
= dupStringObject(key
); 
9055         key 
= dictGetEntryKey(best
) = newkey
; 
9059         vmSwapObjectThreaded(key
,val
,best_db
); 
9062         if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
9063             dictGetEntryVal(best
) = NULL
; 
9071 static int vmSwapOneObjectBlocking() { 
9072     return vmSwapOneObject(0); 
9075 static int vmSwapOneObjectThreaded() { 
9076     return vmSwapOneObject(1); 
9079 /* Return true if it's safe to swap out objects in a given moment. 
9080  * Basically we don't want to swap objects out while there is a BGSAVE 
9081  * or a BGAEOREWRITE running in backgroud. */ 
9082 static int vmCanSwapOut(void) { 
9083     return (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1); 
9086 /* Delete a key if swapped. Returns 1 if the key was found, was swapped 
9087  * and was deleted. Otherwise 0 is returned. */ 
9088 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
) { 
9092     if ((de 
= dictFind(db
->dict
,key
)) == NULL
) return 0; 
9093     foundkey 
= dictGetEntryKey(de
); 
9094     if (foundkey
->storage 
== REDIS_VM_MEMORY
) return 0; 
9099 /* =================== Virtual Memory - Threaded I/O  ======================= */ 
9101 static void freeIOJob(iojob 
*j
) { 
9102     if ((j
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
9103         j
->type 
== REDIS_IOJOB_DO_SWAP 
|| 
9104         j
->type 
== REDIS_IOJOB_LOAD
) && j
->val 
!= NULL
) 
9105         decrRefCount(j
->val
); 
9106     /* We don't decrRefCount the j->key field as we did't incremented 
9107      * the count creating IO Jobs. This is because the key field here is 
9108      * just used as an indentifier and if a key is removed the Job should 
9109      * never be touched again. */ 
9113 /* Every time a thread finished a Job, it writes a byte into the write side 
9114  * of an unix pipe in order to "awake" the main thread, and this function 
9116 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, 
9120     int retval
, processed 
= 0, toprocess 
= -1, trytoswap 
= 1; 
9122     REDIS_NOTUSED(mask
); 
9123     REDIS_NOTUSED(privdata
); 
9125     /* For every byte we read in the read side of the pipe, there is one 
9126      * I/O job completed to process. */ 
9127     while((retval 
= read(fd
,buf
,1)) == 1) { 
9131         struct dictEntry 
*de
; 
9133         redisLog(REDIS_DEBUG
,"Processing I/O completed job"); 
9135         /* Get the processed element (the oldest one) */ 
9137         assert(listLength(server
.io_processed
) != 0); 
9138         if (toprocess 
== -1) { 
9139             toprocess 
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100; 
9140             if (toprocess 
<= 0) toprocess 
= 1; 
9142         ln 
= listFirst(server
.io_processed
); 
9144         listDelNode(server
.io_processed
,ln
); 
9146         /* If this job is marked as canceled, just ignore it */ 
9151         /* Post process it in the main thread, as there are things we 
9152          * can do just here to avoid race conditions and/or invasive locks */ 
9153         redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
); 
9154         de 
= dictFind(j
->db
->dict
,j
->key
); 
9156         key 
= dictGetEntryKey(de
); 
9157         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
9160             /* Key loaded, bring it at home */ 
9161             key
->storage 
= REDIS_VM_MEMORY
; 
9162             key
->vm
.atime 
= server
.unixtime
; 
9163             vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
9164             redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)", 
9165                 (unsigned char*) key
->ptr
); 
9166             server
.vm_stats_swapped_objects
--; 
9167             server
.vm_stats_swapins
++; 
9168             dictGetEntryVal(de
) = j
->val
; 
9169             incrRefCount(j
->val
); 
9172             /* Handle clients waiting for this key to be loaded. */ 
9173             handleClientsBlockedOnSwappedKey(db
,key
); 
9174         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
9175             /* Now we know the amount of pages required to swap this object. 
9176              * Let's find some space for it, and queue this task again 
9177              * rebranded as REDIS_IOJOB_DO_SWAP. */ 
9178             if (!vmCanSwapOut() || 
9179                 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
) 
9181                 /* Ooops... no space or we can't swap as there is 
9182                  * a fork()ed Redis trying to save stuff on disk. */ 
9184                 key
->storage 
= REDIS_VM_MEMORY
; /* undo operation */ 
9186                 /* Note that we need to mark this pages as used now, 
9187                  * if the job will be canceled, we'll mark them as freed 
9189                 vmMarkPagesUsed(j
->page
,j
->pages
); 
9190                 j
->type 
= REDIS_IOJOB_DO_SWAP
; 
9195         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
9198             /* Key swapped. We can finally free some memory. */ 
9199             if (key
->storage 
!= REDIS_VM_SWAPPING
) { 
9200                 printf("key->storage: %d\n",key
->storage
); 
9201                 printf("key->name: %s\n",(char*)key
->ptr
); 
9202                 printf("key->refcount: %d\n",key
->refcount
); 
9203                 printf("val: %p\n",(void*)j
->val
); 
9204                 printf("val->type: %d\n",j
->val
->type
); 
9205                 printf("val->ptr: %s\n",(char*)j
->val
->ptr
); 
9207             redisAssert(key
->storage 
== REDIS_VM_SWAPPING
); 
9208             val 
= dictGetEntryVal(de
); 
9209             key
->vm
.page 
= j
->page
; 
9210             key
->vm
.usedpages 
= j
->pages
; 
9211             key
->storage 
= REDIS_VM_SWAPPED
; 
9212             key
->vtype 
= j
->val
->type
; 
9213             decrRefCount(val
); /* Deallocate the object from memory. */ 
9214             dictGetEntryVal(de
) = NULL
; 
9215             redisLog(REDIS_DEBUG
, 
9216                 "VM: object %s swapped out at %lld (%lld pages) (threaded)", 
9217                 (unsigned char*) key
->ptr
, 
9218                 (unsigned long long) j
->page
, (unsigned long long) j
->pages
); 
9219             server
.vm_stats_swapped_objects
++; 
9220             server
.vm_stats_swapouts
++; 
9222             /* Put a few more swap requests in queue if we are still 
9224             if (trytoswap 
&& vmCanSwapOut() && 
9225                 zmalloc_used_memory() > server
.vm_max_memory
) 
9230                     more 
= listLength(server
.io_newjobs
) < 
9231                             (unsigned) server
.vm_max_threads
; 
9233                     /* Don't waste CPU time if swappable objects are rare. */ 
9234                     if (vmSwapOneObjectThreaded() == REDIS_ERR
) { 
9242         if (processed 
== toprocess
) return; 
9244     if (retval 
< 0 && errno 
!= EAGAIN
) { 
9245         redisLog(REDIS_WARNING
, 
9246             "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", 
9251 static void lockThreadedIO(void) { 
9252     pthread_mutex_lock(&server
.io_mutex
); 
9255 static void unlockThreadedIO(void) { 
9256     pthread_mutex_unlock(&server
.io_mutex
); 
9259 /* Remove the specified object from the threaded I/O queue if still not 
9260  * processed, otherwise make sure to flag it as canceled. */ 
9261 static void vmCancelThreadedIOJob(robj 
*o
) { 
9263         server
.io_newjobs
,      /* 0 */ 
9264         server
.io_processing
,   /* 1 */ 
9265         server
.io_processed     
/* 2 */ 
9269     assert(o
->storage 
== REDIS_VM_LOADING 
|| o
->storage 
== REDIS_VM_SWAPPING
); 
9272     /* Search for a matching key in one of the queues */ 
9273     for (i 
= 0; i 
< 3; i
++) { 
9277         listRewind(lists
[i
],&li
); 
9278         while ((ln 
= listNext(&li
)) != NULL
) { 
9279             iojob 
*job 
= ln
->value
; 
9281             if (job
->canceled
) continue; /* Skip this, already canceled. */ 
9282             if (job
->key 
== o
) { 
9283                 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n", 
9284                     (void*)job
, (char*)o
->ptr
, job
->type
, i
); 
9285                 /* Mark the pages as free since the swap didn't happened 
9286                  * or happened but is now discarded. */ 
9287                 if (i 
!= 1 && job
->type 
== REDIS_IOJOB_DO_SWAP
) 
9288                     vmMarkPagesFree(job
->page
,job
->pages
); 
9289                 /* Cancel the job. It depends on the list the job is 
9292                 case 0: /* io_newjobs */ 
9293                     /* If the job was yet not processed the best thing to do 
9294                      * is to remove it from the queue at all */ 
9296                     listDelNode(lists
[i
],ln
); 
9298                 case 1: /* io_processing */ 
9299                     /* Oh Shi- the thread is messing with the Job: 
9301                      * Probably it's accessing the object if this is a 
9302                      * PREPARE_SWAP or DO_SWAP job. 
9303                      * If it's a LOAD job it may be reading from disk and 
9304                      * if we don't wait for the job to terminate before to 
9305                      * cancel it, maybe in a few microseconds data can be 
9306                      * corrupted in this pages. So the short story is: 
9308                      * Better to wait for the job to move into the 
9309                      * next queue (processed)... */ 
9311                     /* We try again and again until the job is completed. */ 
9313                     /* But let's wait some time for the I/O thread 
9314                      * to finish with this job. After all this condition 
9315                      * should be very rare. */ 
9318                 case 2: /* io_processed */ 
9319                     /* The job was already processed, that's easy... 
9320                      * just mark it as canceled so that we'll ignore it 
9321                      * when processing completed jobs. */ 
9325                 /* Finally we have to adjust the storage type of the object 
9326                  * in order to "UNDO" the operaiton. */ 
9327                 if (o
->storage 
== REDIS_VM_LOADING
) 
9328                     o
->storage 
= REDIS_VM_SWAPPED
; 
9329                 else if (o
->storage 
== REDIS_VM_SWAPPING
) 
9330                     o
->storage 
= REDIS_VM_MEMORY
; 
9337     assert(1 != 1); /* We should never reach this */ 
9340 static void *IOThreadEntryPoint(void *arg
) { 
9345     pthread_detach(pthread_self()); 
9347         /* Get a new job to process */ 
9349         if (listLength(server
.io_newjobs
) == 0) { 
9350             /* No new jobs in queue, exit. */ 
9351             redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do", 
9352                 (long) pthread_self()); 
9353             server
.io_active_threads
--; 
9357         ln 
= listFirst(server
.io_newjobs
); 
9359         listDelNode(server
.io_newjobs
,ln
); 
9360         /* Add the job in the processing queue */ 
9361         j
->thread 
= pthread_self(); 
9362         listAddNodeTail(server
.io_processing
,j
); 
9363         ln 
= listLast(server
.io_processing
); /* We use ln later to remove it */ 
9365         redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'", 
9366             (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
); 
9368         /* Process the Job */ 
9369         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
9370             j
->val 
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
); 
9371         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
9372             FILE *fp 
= fopen("/dev/null","w+"); 
9373             j
->pages 
= rdbSavedObjectPages(j
->val
,fp
); 
9375         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
9376             if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
) 
9380         /* Done: insert the job into the processed queue */ 
9381         redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)", 
9382             (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
); 
9384         listDelNode(server
.io_processing
,ln
); 
9385         listAddNodeTail(server
.io_processed
,j
); 
9388         /* Signal the main thread there is new stuff to process */ 
9389         assert(write(server
.io_ready_pipe_write
,"x",1) == 1); 
9391     return NULL
; /* never reached */ 
9394 static void spawnIOThread(void) { 
9396     sigset_t mask
, omask
; 
9400     sigaddset(&mask
,SIGCHLD
); 
9401     sigaddset(&mask
,SIGHUP
); 
9402     sigaddset(&mask
,SIGPIPE
); 
9403     pthread_sigmask(SIG_SETMASK
, &mask
, &omask
); 
9404     while ((err 
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) { 
9405         redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s", 
9409     pthread_sigmask(SIG_SETMASK
, &omask
, NULL
); 
9410     server
.io_active_threads
++; 
9413 /* We need to wait for the last thread to exit before we are able to 
9414  * fork() in order to BGSAVE or BGREWRITEAOF. */ 
9415 static void waitEmptyIOJobsQueue(void) { 
9417         int io_processed_len
; 
9420         if (listLength(server
.io_newjobs
) == 0 && 
9421             listLength(server
.io_processing
) == 0 && 
9422             server
.io_active_threads 
== 0) 
9427         /* While waiting for empty jobs queue condition we post-process some 
9428          * finshed job, as I/O threads may be hanging trying to write against 
9429          * the io_ready_pipe_write FD but there are so much pending jobs that 
9431         io_processed_len 
= listLength(server
.io_processed
); 
9433         if (io_processed_len
) { 
9434             vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0); 
9435             usleep(1000); /* 1 millisecond */ 
9437             usleep(10000); /* 10 milliseconds */ 
9442 static void vmReopenSwapFile(void) { 
9443     /* Note: we don't close the old one as we are in the child process 
9444      * and don't want to mess at all with the original file object. */ 
9445     server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b"); 
9446     if (server
.vm_fp 
== NULL
) { 
9447         redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.", 
9448             server
.vm_swap_file
); 
9451     server
.vm_fd 
= fileno(server
.vm_fp
); 
9454 /* This function must be called while with threaded IO locked */ 
9455 static void queueIOJob(iojob 
*j
) { 
9456     redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n", 
9457         (void*)j
, j
->type
, (char*)j
->key
->ptr
); 
9458     listAddNodeTail(server
.io_newjobs
,j
); 
9459     if (server
.io_active_threads 
< server
.vm_max_threads
) 
9463 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
) { 
9466     assert(key
->storage 
== REDIS_VM_MEMORY
); 
9467     assert(key
->refcount 
== 1); 
9469     j 
= zmalloc(sizeof(*j
)); 
9470     j
->type 
= REDIS_IOJOB_PREPARE_SWAP
; 
9476     j
->thread 
= (pthread_t
) -1; 
9477     key
->storage 
= REDIS_VM_SWAPPING
; 
9485 /* ============ Virtual Memory - Blocking clients on missing keys =========== */ 
9487 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded. 
9488  * If there is not already a job loading the key, it is craeted. 
9489  * The key is added to the io_keys list in the client structure, and also 
9490  * in the hash table mapping swapped keys to waiting clients, that is, 
9491  * server.io_waited_keys. */ 
9492 static int waitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
9493     struct dictEntry 
*de
; 
9497     /* If the key does not exist or is already in RAM we don't need to 
9498      * block the client at all. */ 
9499     de 
= dictFind(c
->db
->dict
,key
); 
9500     if (de 
== NULL
) return 0; 
9501     o 
= dictGetEntryKey(de
); 
9502     if (o
->storage 
== REDIS_VM_MEMORY
) { 
9504     } else if (o
->storage 
== REDIS_VM_SWAPPING
) { 
9505         /* We were swapping the key, undo it! */ 
9506         vmCancelThreadedIOJob(o
); 
9510     /* OK: the key is either swapped, or being loaded just now. */ 
9512     /* Add the key to the list of keys this client is waiting for. 
9513      * This maps clients to keys they are waiting for. */ 
9514     listAddNodeTail(c
->io_keys
,key
); 
9517     /* Add the client to the swapped keys => clients waiting map. */ 
9518     de 
= dictFind(c
->db
->io_keys
,key
); 
9522         /* For every key we take a list of clients blocked for it */ 
9524         retval 
= dictAdd(c
->db
->io_keys
,key
,l
); 
9526         assert(retval 
== DICT_OK
); 
9528         l 
= dictGetEntryVal(de
); 
9530     listAddNodeTail(l
,c
); 
9532     /* Are we already loading the key from disk? If not create a job */ 
9533     if (o
->storage 
== REDIS_VM_SWAPPED
) { 
9536         o
->storage 
= REDIS_VM_LOADING
; 
9537         j 
= zmalloc(sizeof(*j
)); 
9538         j
->type 
= REDIS_IOJOB_LOAD
; 
9541         j
->key
->vtype 
= o
->vtype
; 
9542         j
->page 
= o
->vm
.page
; 
9545         j
->thread 
= (pthread_t
) -1; 
9553 /* Preload keys for any command with first, last and step values for 
9554  * the command keys prototype, as defined in the command table. */ 
9555 static void waitForMultipleSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
9557     if (cmd
->vm_firstkey 
== 0) return; 
9558     last 
= cmd
->vm_lastkey
; 
9559     if (last 
< 0) last 
= argc
+last
; 
9560     for (j 
= cmd
->vm_firstkey
; j 
<= last
; j 
+= cmd
->vm_keystep
) { 
9561         redisAssert(j 
< argc
); 
9562         waitForSwappedKey(c
,argv
[j
]); 
9566 /* Preload keys needed for the ZUNION and ZINTER commands. 
9567  * Note that the number of keys to preload is user-defined, so we need to 
9568  * apply a sanity check against argc. */ 
9569 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
9573     num 
= atoi(argv
[2]->ptr
); 
9574     if (num 
> (argc
-3)) return; 
9575     for (i 
= 0; i 
< num
; i
++) { 
9576         waitForSwappedKey(c
,argv
[3+i
]); 
9580 /* Preload keys needed to execute the entire MULTI/EXEC block. 
9582  * This function is called by blockClientOnSwappedKeys when EXEC is issued, 
9583  * and will block the client when any command requires a swapped out value. */ 
9584 static void execBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
9586     struct redisCommand 
*mcmd
; 
9589     REDIS_NOTUSED(argc
); 
9590     REDIS_NOTUSED(argv
); 
9592     if (!(c
->flags 
& REDIS_MULTI
)) return; 
9593     for (i 
= 0; i 
< c
->mstate
.count
; i
++) { 
9594         mcmd 
= c
->mstate
.commands
[i
].cmd
; 
9595         margc 
= c
->mstate
.commands
[i
].argc
; 
9596         margv 
= c
->mstate
.commands
[i
].argv
; 
9598         if (mcmd
->vm_preload_proc 
!= NULL
) { 
9599             mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
); 
9601             waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
); 
9606 /* Is this client attempting to run a command against swapped keys? 
9607  * If so, block it ASAP, load the keys in background, then resume it. 
9609  * The important idea about this function is that it can fail! If keys will 
9610  * still be swapped when the client is resumed, this key lookups will 
9611  * just block loading keys from disk. In practical terms this should only 
9612  * happen with SORT BY command or if there is a bug in this function. 
9614  * Return 1 if the client is marked as blocked, 0 if the client can 
9615  * continue as the keys it is going to access appear to be in memory. */ 
9616 static int blockClientOnSwappedKeys(struct redisCommand 
*cmd
, redisClient 
*c
) { 
9617     if (cmd
->vm_preload_proc 
!= NULL
) { 
9618         cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
); 
9620         waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
); 
9623     /* If the client was blocked for at least one key, mark it as blocked. */ 
9624     if (listLength(c
->io_keys
)) { 
9625         c
->flags 
|= REDIS_IO_WAIT
; 
9626         aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
9627         server
.vm_blocked_clients
++; 
9634 /* Remove the 'key' from the list of blocked keys for a given client. 
9636  * The function returns 1 when there are no longer blocking keys after 
9637  * the current one was removed (and the client can be unblocked). */ 
9638 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
9642     struct dictEntry 
*de
; 
9644     /* Remove the key from the list of keys this client is waiting for. */ 
9645     listRewind(c
->io_keys
,&li
); 
9646     while ((ln 
= listNext(&li
)) != NULL
) { 
9647         if (compareStringObjects(ln
->value
,key
) == 0) { 
9648             listDelNode(c
->io_keys
,ln
); 
9654     /* Remove the client form the key => waiting clients map. */ 
9655     de 
= dictFind(c
->db
->io_keys
,key
); 
9657     l 
= dictGetEntryVal(de
); 
9658     ln 
= listSearchKey(l
,c
); 
9661     if (listLength(l
) == 0) 
9662         dictDelete(c
->db
->io_keys
,key
); 
9664     return listLength(c
->io_keys
) == 0; 
9667 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
) { 
9668     struct dictEntry 
*de
; 
9673     de 
= dictFind(db
->io_keys
,key
); 
9676     l 
= dictGetEntryVal(de
); 
9677     len 
= listLength(l
); 
9678     /* Note: we can't use something like while(listLength(l)) as the list 
9679      * can be freed by the calling function when we remove the last element. */ 
9682         redisClient 
*c 
= ln
->value
; 
9684         if (dontWaitForSwappedKey(c
,key
)) { 
9685             /* Put the client in the list of clients ready to go as we 
9686              * loaded all the keys about it. */ 
9687             listAddNodeTail(server
.io_ready_clients
,c
); 
9692 /* =========================== Remote Configuration ========================= */ 
9694 static void configSetCommand(redisClient 
*c
) { 
9695     robj 
*o 
= getDecodedObject(c
->argv
[3]); 
9696     if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) { 
9697         zfree(server
.dbfilename
); 
9698         server
.dbfilename 
= zstrdup(o
->ptr
); 
9699     } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) { 
9700         zfree(server
.requirepass
); 
9701         server
.requirepass 
= zstrdup(o
->ptr
); 
9702     } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) { 
9703         zfree(server
.masterauth
); 
9704         server
.masterauth 
= zstrdup(o
->ptr
); 
9705     } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) { 
9706         server
.maxmemory 
= strtoll(o
->ptr
, NULL
, 10); 
9708         addReplySds(c
,sdscatprintf(sdsempty(), 
9709             "-ERR not supported CONFIG parameter %s\r\n", 
9710             (char*)c
->argv
[2]->ptr
)); 
9715     addReply(c
,shared
.ok
); 
9718 static void configGetCommand(redisClient 
*c
) { 
9719     robj 
*o 
= getDecodedObject(c
->argv
[2]); 
9720     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
9721     char *pattern 
= o
->ptr
; 
9725     decrRefCount(lenobj
); 
9727     if (stringmatch(pattern
,"dbfilename",0)) { 
9728         addReplyBulkCString(c
,"dbfilename"); 
9729         addReplyBulkCString(c
,server
.dbfilename
); 
9732     if (stringmatch(pattern
,"requirepass",0)) { 
9733         addReplyBulkCString(c
,"requirepass"); 
9734         addReplyBulkCString(c
,server
.requirepass
); 
9737     if (stringmatch(pattern
,"masterauth",0)) { 
9738         addReplyBulkCString(c
,"masterauth"); 
9739         addReplyBulkCString(c
,server
.masterauth
); 
9742     if (stringmatch(pattern
,"maxmemory",0)) { 
9745         snprintf(buf
,128,"%llu\n",server
.maxmemory
); 
9746         addReplyBulkCString(c
,"maxmemory"); 
9747         addReplyBulkCString(c
,buf
); 
9751     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2); 
9754 static void configCommand(redisClient 
*c
) { 
9755     if (!strcasecmp(c
->argv
[1]->ptr
,"set")) { 
9756         if (c
->argc 
!= 4) goto badarity
; 
9757         configSetCommand(c
); 
9758     } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) { 
9759         if (c
->argc 
!= 3) goto badarity
; 
9760         configGetCommand(c
); 
9761     } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) { 
9762         if (c
->argc 
!= 2) goto badarity
; 
9763         server
.stat_numcommands 
= 0; 
9764         server
.stat_numconnections 
= 0; 
9765         server
.stat_expiredkeys 
= 0; 
9766         server
.stat_starttime 
= time(NULL
); 
9767         addReply(c
,shared
.ok
); 
9769         addReplySds(c
,sdscatprintf(sdsempty(), 
9770             "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n")); 
9775     addReplySds(c
,sdscatprintf(sdsempty(), 
9776         "-ERR Wrong number of arguments for CONFIG %s\r\n", 
9777         (char*) c
->argv
[1]->ptr
)); 
9780 /* =========================== Pubsub implementation ======================== */ 
9782 static void freePubsubPattern(void *p
) { 
9783     pubsubPattern 
*pat 
= p
; 
9785     decrRefCount(pat
->pattern
); 
9789 static int listMatchPubsubPattern(void *a
, void *b
) { 
9790     pubsubPattern 
*pa 
= a
, *pb 
= b
; 
9792     return (pa
->client 
== pb
->client
) && 
9793            (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0); 
9796 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or 
9797  * 0 if the client was already subscribed to that channel. */ 
9798 static int pubsubSubscribeChannel(redisClient 
*c
, robj 
*channel
) { 
9799     struct dictEntry 
*de
; 
9800     list 
*clients 
= NULL
; 
9803     /* Add the channel to the client -> channels hash table */ 
9804     if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) { 
9806         incrRefCount(channel
); 
9807         /* Add the client to the channel -> list of clients hash table */ 
9808         de 
= dictFind(server
.pubsub_channels
,channel
); 
9810             clients 
= listCreate(); 
9811             dictAdd(server
.pubsub_channels
,channel
,clients
); 
9812             incrRefCount(channel
); 
9814             clients 
= dictGetEntryVal(de
); 
9816         listAddNodeTail(clients
,c
); 
9818     /* Notify the client */ 
9819     addReply(c
,shared
.mbulk3
); 
9820     addReply(c
,shared
.subscribebulk
); 
9821     addReplyBulk(c
,channel
); 
9822     addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
9826 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
9827  * 0 if the client was not subscribed to the specified channel. */ 
9828 static int pubsubUnsubscribeChannel(redisClient 
*c
, robj 
*channel
, int notify
) { 
9829     struct dictEntry 
*de
; 
9834     /* Remove the channel from the client -> channels hash table */ 
9835     incrRefCount(channel
); /* channel may be just a pointer to the same object 
9836                             we have in the hash tables. Protect it... */ 
9837     if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) { 
9839         /* Remove the client from the channel -> clients list hash table */ 
9840         de 
= dictFind(server
.pubsub_channels
,channel
); 
9842         clients 
= dictGetEntryVal(de
); 
9843         ln 
= listSearchKey(clients
,c
); 
9845         listDelNode(clients
,ln
); 
9846         if (listLength(clients
) == 0) { 
9847             /* Free the list and associated hash entry at all if this was 
9848              * the latest client, so that it will be possible to abuse 
9849              * Redis PUBSUB creating millions of channels. */ 
9850             dictDelete(server
.pubsub_channels
,channel
); 
9853     /* Notify the client */ 
9855         addReply(c
,shared
.mbulk3
); 
9856         addReply(c
,shared
.unsubscribebulk
); 
9857         addReplyBulk(c
,channel
); 
9858         addReplyLong(c
,dictSize(c
->pubsub_channels
)+ 
9859                        listLength(c
->pubsub_patterns
)); 
9862     decrRefCount(channel
); /* it is finally safe to release it */ 
9866 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */ 
9867 static int pubsubSubscribePattern(redisClient 
*c
, robj 
*pattern
) { 
9870     if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) { 
9873         listAddNodeTail(c
->pubsub_patterns
,pattern
); 
9874         incrRefCount(pattern
); 
9875         pat 
= zmalloc(sizeof(*pat
)); 
9876         pat
->pattern 
= getDecodedObject(pattern
); 
9878         listAddNodeTail(server
.pubsub_patterns
,pat
); 
9880     /* Notify the client */ 
9881     addReply(c
,shared
.mbulk3
); 
9882     addReply(c
,shared
.psubscribebulk
); 
9883     addReplyBulk(c
,pattern
); 
9884     addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
9888 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
9889  * 0 if the client was not subscribed to the specified channel. */ 
9890 static int pubsubUnsubscribePattern(redisClient 
*c
, robj 
*pattern
, int notify
) { 
9895     incrRefCount(pattern
); /* Protect the object. May be the same we remove */ 
9896     if ((ln 
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) { 
9898         listDelNode(c
->pubsub_patterns
,ln
); 
9900         pat
.pattern 
= pattern
; 
9901         ln 
= listSearchKey(server
.pubsub_patterns
,&pat
); 
9902         listDelNode(server
.pubsub_patterns
,ln
); 
9904     /* Notify the client */ 
9906         addReply(c
,shared
.mbulk3
); 
9907         addReply(c
,shared
.punsubscribebulk
); 
9908         addReplyBulk(c
,pattern
); 
9909         addReplyLong(c
,dictSize(c
->pubsub_channels
)+ 
9910                        listLength(c
->pubsub_patterns
)); 
9912     decrRefCount(pattern
); 
9916 /* Unsubscribe from all the channels. Return the number of channels the 
9917  * client was subscribed from. */ 
9918 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
) { 
9919     dictIterator 
*di 
= dictGetIterator(c
->pubsub_channels
); 
9923     while((de 
= dictNext(di
)) != NULL
) { 
9924         robj 
*channel 
= dictGetEntryKey(de
); 
9926         count 
+= pubsubUnsubscribeChannel(c
,channel
,notify
); 
9928     dictReleaseIterator(di
); 
9932 /* Unsubscribe from all the patterns. Return the number of patterns the 
9933  * client was subscribed from. */ 
9934 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
) { 
9939     listRewind(c
->pubsub_patterns
,&li
); 
9940     while ((ln 
= listNext(&li
)) != NULL
) { 
9941         robj 
*pattern 
= ln
->value
; 
9943         count 
+= pubsubUnsubscribePattern(c
,pattern
,notify
); 
9948 /* Publish a message */ 
9949 static int pubsubPublishMessage(robj 
*channel
, robj 
*message
) { 
9951     struct dictEntry 
*de
; 
9955     /* Send to clients listening for that channel */ 
9956     de 
= dictFind(server
.pubsub_channels
,channel
); 
9958         list 
*list 
= dictGetEntryVal(de
); 
9962         listRewind(list
,&li
); 
9963         while ((ln 
= listNext(&li
)) != NULL
) { 
9964             redisClient 
*c 
= ln
->value
; 
9966             addReply(c
,shared
.mbulk3
); 
9967             addReply(c
,shared
.messagebulk
); 
9968             addReplyBulk(c
,channel
); 
9969             addReplyBulk(c
,message
); 
9973     /* Send to clients listening to matching channels */ 
9974     if (listLength(server
.pubsub_patterns
)) { 
9975         listRewind(server
.pubsub_patterns
,&li
); 
9976         channel 
= getDecodedObject(channel
); 
9977         while ((ln 
= listNext(&li
)) != NULL
) { 
9978             pubsubPattern 
*pat 
= ln
->value
; 
9980             if (stringmatchlen((char*)pat
->pattern
->ptr
, 
9981                                 sdslen(pat
->pattern
->ptr
), 
9982                                 (char*)channel
->ptr
, 
9983                                 sdslen(channel
->ptr
),0)) { 
9984                 addReply(pat
->client
,shared
.mbulk4
); 
9985                 addReply(pat
->client
,shared
.pmessagebulk
); 
9986                 addReplyBulk(pat
->client
,pat
->pattern
); 
9987                 addReplyBulk(pat
->client
,channel
); 
9988                 addReplyBulk(pat
->client
,message
); 
9992         decrRefCount(channel
); 
9997 static void subscribeCommand(redisClient 
*c
) { 
10000     for (j 
= 1; j 
< c
->argc
; j
++) 
10001         pubsubSubscribeChannel(c
,c
->argv
[j
]); 
10004 static void unsubscribeCommand(redisClient 
*c
) { 
10005     if (c
->argc 
== 1) { 
10006         pubsubUnsubscribeAllChannels(c
,1); 
10011         for (j 
= 1; j 
< c
->argc
; j
++) 
10012             pubsubUnsubscribeChannel(c
,c
->argv
[j
],1); 
10016 static void psubscribeCommand(redisClient 
*c
) { 
10019     for (j 
= 1; j 
< c
->argc
; j
++) 
10020         pubsubSubscribePattern(c
,c
->argv
[j
]); 
10023 static void punsubscribeCommand(redisClient 
*c
) { 
10024     if (c
->argc 
== 1) { 
10025         pubsubUnsubscribeAllPatterns(c
,1); 
10030         for (j 
= 1; j 
< c
->argc
; j
++) 
10031             pubsubUnsubscribePattern(c
,c
->argv
[j
],1); 
10035 static void publishCommand(redisClient 
*c
) { 
10036     int receivers 
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]); 
10037     addReplyLong(c
,receivers
); 
10040 /* ================================= Debugging ============================== */ 
10042 static void debugCommand(redisClient 
*c
) { 
10043     if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) { 
10044         *((char*)-1) = 'x'; 
10045     } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) { 
10046         if (rdbSave(server
.dbfilename
) != REDIS_OK
) { 
10047             addReply(c
,shared
.err
); 
10051         if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
10052             addReply(c
,shared
.err
); 
10055         redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD"); 
10056         addReply(c
,shared
.ok
); 
10057     } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) { 
10059         if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) { 
10060             addReply(c
,shared
.err
); 
10063         redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF"); 
10064         addReply(c
,shared
.ok
); 
10065     } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc 
== 3) { 
10066         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
10070             addReply(c
,shared
.nokeyerr
); 
10073         key 
= dictGetEntryKey(de
); 
10074         val 
= dictGetEntryVal(de
); 
10075         if (!server
.vm_enabled 
|| (key
->storage 
== REDIS_VM_MEMORY 
|| 
10076                                    key
->storage 
== REDIS_VM_SWAPPING
)) { 
10080             if (val
->encoding 
< (sizeof(strencoding
)/sizeof(char*))) { 
10081                 strenc 
= strencoding
[val
->encoding
]; 
10083                 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
); 
10086             addReplySds(c
,sdscatprintf(sdsempty(), 
10087                 "+Key at:%p refcount:%d, value at:%p refcount:%d " 
10088                 "encoding:%s serializedlength:%lld\r\n", 
10089                 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
, 
10090                 strenc
, (long long) rdbSavedObjectLen(val
,NULL
))); 
10092             addReplySds(c
,sdscatprintf(sdsempty(), 
10093                 "+Key at:%p refcount:%d, value swapped at: page %llu " 
10094                 "using %llu pages\r\n", 
10095                 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
, 
10096                 (unsigned long long) key
->vm
.usedpages
)); 
10098     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc 
== 3) { 
10099         lookupKeyRead(c
->db
,c
->argv
[2]); 
10100         addReply(c
,shared
.ok
); 
10101     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc 
== 3) { 
10102         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
10105         if (!server
.vm_enabled
) { 
10106             addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n")); 
10110             addReply(c
,shared
.nokeyerr
); 
10113         key 
= dictGetEntryKey(de
); 
10114         val 
= dictGetEntryVal(de
); 
10115         /* If the key is shared we want to create a copy */ 
10116         if (key
->refcount 
> 1) { 
10117             robj 
*newkey 
= dupStringObject(key
); 
10119             key 
= dictGetEntryKey(de
) = newkey
; 
10122         if (key
->storage 
!= REDIS_VM_MEMORY
) { 
10123             addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n")); 
10124         } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
10125             dictGetEntryVal(de
) = NULL
; 
10126             addReply(c
,shared
.ok
); 
10128             addReply(c
,shared
.err
); 
10131         addReplySds(c
,sdsnew( 
10132             "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n")); 
10136 static void _redisAssert(char *estr
, char *file
, int line
) { 
10137     redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ==="); 
10138     redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
); 
10139 #ifdef HAVE_BACKTRACE 
10140     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
10141     *((char*)-1) = 'x'; 
10145 static void _redisPanic(char *msg
, char *file
, int line
) { 
10146     redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue"); 
10147     redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
); 
10148 #ifdef HAVE_BACKTRACE 
10149     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
10150     *((char*)-1) = 'x'; 
10154 /* =================================== Main! ================================ */ 
10157 int linuxOvercommitMemoryValue(void) { 
10158     FILE *fp 
= fopen("/proc/sys/vm/overcommit_memory","r"); 
10161     if (!fp
) return -1; 
10162     if (fgets(buf
,64,fp
) == NULL
) { 
10171 void linuxOvercommitMemoryWarning(void) { 
10172     if (linuxOvercommitMemoryValue() == 0) { 
10173         redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); 
10176 #endif /* __linux__ */ 
10178 static void daemonize(void) { 
10182     if (fork() != 0) exit(0); /* parent exits */ 
10183     setsid(); /* create a new session */ 
10185     /* Every output goes to /dev/null. If Redis is daemonized but 
10186      * the 'logfile' is set to 'stdout' in the configuration file 
10187      * it will not log at all. */ 
10188     if ((fd 
= open("/dev/null", O_RDWR
, 0)) != -1) { 
10189         dup2(fd
, STDIN_FILENO
); 
10190         dup2(fd
, STDOUT_FILENO
); 
10191         dup2(fd
, STDERR_FILENO
); 
10192         if (fd 
> STDERR_FILENO
) close(fd
); 
10194     /* Try to write the pid file */ 
10195     fp 
= fopen(server
.pidfile
,"w"); 
10197         fprintf(fp
,"%d\n",getpid()); 
10202 static void version() { 
10203     printf("Redis server version %s\n", REDIS_VERSION
); 
10207 static void usage() { 
10208     fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n"); 
10209     fprintf(stderr
,"       ./redis-server - (read config from stdin)\n"); 
10213 int main(int argc
, char **argv
) { 
10216     initServerConfig(); 
10218         if (strcmp(argv
[1], "-v") == 0 || 
10219             strcmp(argv
[1], "--version") == 0) version(); 
10220         if (strcmp(argv
[1], "--help") == 0) usage(); 
10221         resetServerSaveParams(); 
10222         loadServerConfig(argv
[1]); 
10223     } else if ((argc 
> 2)) { 
10226         redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); 
10228     if (server
.daemonize
) daemonize(); 
10230     redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
); 
10232     linuxOvercommitMemoryWarning(); 
10234     start 
= time(NULL
); 
10235     if (server
.appendonly
) { 
10236         if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
) 
10237             redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
); 
10239         if (rdbLoad(server
.dbfilename
) == REDIS_OK
) 
10240             redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
); 
10242     redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
); 
10243     aeSetBeforeSleepProc(server
.el
,beforeSleep
); 
10245     aeDeleteEventLoop(server
.el
); 
10249 /* ============================= Backtrace support ========================= */ 
10251 #ifdef HAVE_BACKTRACE 
10252 static char *findFuncName(void *pointer
, unsigned long *offset
); 
10254 static void *getMcontextEip(ucontext_t 
*uc
) { 
10255 #if defined(__FreeBSD__) 
10256     return (void*) uc
->uc_mcontext
.mc_eip
; 
10257 #elif defined(__dietlibc__) 
10258     return (void*) uc
->uc_mcontext
.eip
; 
10259 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) 
10261     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
10263     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
10265 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6) 
10266   #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__) 
10267     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
10269     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
10271 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__) 
10272     return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */ 
10273 #elif defined(__ia64__) /* Linux IA64 */ 
10274     return (void*) uc
->uc_mcontext
.sc_ip
; 
10280 static void segvHandler(int sig
, siginfo_t 
*info
, void *secret
) { 
10282     char **messages 
= NULL
; 
10283     int i
, trace_size 
= 0; 
10284     unsigned long offset
=0; 
10285     ucontext_t 
*uc 
= (ucontext_t
*) secret
; 
10287     REDIS_NOTUSED(info
); 
10289     redisLog(REDIS_WARNING
, 
10290         "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
); 
10291     infostring 
= genRedisInfoString(); 
10292     redisLog(REDIS_WARNING
, "%s",infostring
); 
10293     /* It's not safe to sdsfree() the returned string under memory 
10294      * corruption conditions. Let it leak as we are going to abort */ 
10296     trace_size 
= backtrace(trace
, 100); 
10297     /* overwrite sigaction with caller's address */ 
10298     if (getMcontextEip(uc
) != NULL
) { 
10299         trace
[1] = getMcontextEip(uc
); 
10301     messages 
= backtrace_symbols(trace
, trace_size
); 
10303     for (i
=1; i
<trace_size
; ++i
) { 
10304         char *fn 
= findFuncName(trace
[i
], &offset
), *p
; 
10306         p 
= strchr(messages
[i
],'+'); 
10307         if (!fn 
|| (p 
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) { 
10308             redisLog(REDIS_WARNING
,"%s", messages
[i
]); 
10310             redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
); 
10313     /* free(messages); Don't call free() with possibly corrupted memory. */ 
10317 static void setupSigSegvAction(void) { 
10318     struct sigaction act
; 
10320     sigemptyset (&act
.sa_mask
); 
10321     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction 
10322      * is used. Otherwise, sa_handler is used */ 
10323     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND 
| SA_SIGINFO
; 
10324     act
.sa_sigaction 
= segvHandler
; 
10325     sigaction (SIGSEGV
, &act
, NULL
); 
10326     sigaction (SIGBUS
, &act
, NULL
); 
10327     sigaction (SIGFPE
, &act
, NULL
); 
10328     sigaction (SIGILL
, &act
, NULL
); 
10329     sigaction (SIGBUS
, &act
, NULL
); 
10333 #include "staticsymbols.h" 
10334 /* This function try to convert a pointer into a function name. It's used in 
10335  * oreder to provide a backtrace under segmentation fault that's able to 
10336  * display functions declared as static (otherwise the backtrace is useless). */ 
10337 static char *findFuncName(void *pointer
, unsigned long *offset
){ 
10339     unsigned long off
, minoff 
= 0; 
10341     /* Try to match against the Symbol with the smallest offset */ 
10342     for (i
=0; symsTable
[i
].pointer
; i
++) { 
10343         unsigned long lp 
= (unsigned long) pointer
; 
10345         if (lp 
!= (unsigned long)-1 && lp 
>= symsTable
[i
].pointer
) { 
10346             off
=lp
-symsTable
[i
].pointer
; 
10347             if (ret 
< 0 || off 
< minoff
) { 
10353     if (ret 
== -1) return NULL
; 
10355     return symsTable
[ret
].name
; 
10357 #else /* HAVE_BACKTRACE */ 
10358 static void setupSigSegvAction(void) { 
10360 #endif /* HAVE_BACKTRACE */