2  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
   5  * Redistribution and use in source and binary forms, with or without 
   6  * modification, are permitted provided that the following conditions are met: 
   8  *   * Redistributions of source code must retain the above copyright notice, 
   9  *     this list of conditions and the following disclaimer. 
  10  *   * Redistributions in binary form must reproduce the above copyright 
  11  *     notice, this list of conditions and the following disclaimer in the 
  12  *     documentation and/or other materials provided with the distribution. 
  13  *   * Neither the name of Redis nor the names of its contributors may be used 
  14  *     to endorse or promote products derived from this software without 
  15  *     specific prior written permission. 
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  27  * POSSIBILITY OF SUCH DAMAGE. 
  30 #define REDIS_VERSION "1.3.12" 
  45 #endif /* HAVE_BACKTRACE */ 
  53 #include <arpa/inet.h> 
  57 #include <sys/resource.h> 
  65 #include "solarisfixes.h" 
  69 #include "ae.h"     /* Event driven programming library */ 
  70 #include "sds.h"    /* Dynamic safe strings */ 
  71 #include "anet.h"   /* Networking the easy way */ 
  72 #include "dict.h"   /* Hash tables */ 
  73 #include "adlist.h" /* Linked lists */ 
  74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */ 
  75 #include "lzf.h"    /* LZF compression library */ 
  76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ 
  77 #include "zipmap.h" /* Compact dictionary-alike data structure */ 
  78 #include "sha1.h"   /* SHA1 is used for DEBUG DIGEST */ 
  79 #include "release.h" /* Release and/or git repository information */ 
  85 /* Static server configuration */ 
  86 #define REDIS_SERVERPORT        6379    /* TCP port */ 
  87 #define REDIS_MAXIDLETIME       (60*5)  /* default client timeout */ 
  88 #define REDIS_IOBUF_LEN         1024 
  89 #define REDIS_LOADBUF_LEN       1024 
  90 #define REDIS_STATIC_ARGS       8 
  91 #define REDIS_DEFAULT_DBNUM     16 
  92 #define REDIS_CONFIGLINE_MAX    1024 
  93 #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */ 
  94 #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */ 
  95 #define REDIS_EXPIRELOOKUPS_PER_CRON    10 /* lookup 10 expires per loop */ 
  96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64) 
  97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ 
  99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ 
 100 #define REDIS_WRITEV_THRESHOLD      3 
 101 /* Max number of iovecs used for each writev call */ 
 102 #define REDIS_WRITEV_IOVEC_COUNT    256 
 104 /* Hash table parameters */ 
 105 #define REDIS_HT_MINFILL        10      /* Minimal hash table fill 10% */ 
 108 #define REDIS_CMD_BULK          1       /* Bulk write command */ 
 109 #define REDIS_CMD_INLINE        2       /* Inline command */ 
 110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with 
 111    this flags will return an error when the 'maxmemory' option is set in the 
 112    config file and the server is using more than maxmemory bytes of memory. 
 113    In short this commands are denied on low memory conditions. */ 
 114 #define REDIS_CMD_DENYOOM       4 
 115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */ 
 118 #define REDIS_STRING 0 
 124 /* Objects encoding. Some kind of objects like Strings and Hashes can be 
 125  * internally represented in multiple ways. The 'encoding' field of the object 
 126  * is set to one of this fields for this object. */ 
 127 #define REDIS_ENCODING_RAW 0    /* Raw representation */ 
 128 #define REDIS_ENCODING_INT 1    /* Encoded as integer */ 
 129 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */ 
 130 #define REDIS_ENCODING_HT 3     /* Encoded as an hash table */ 
 132 static char* strencoding
[] = { 
 133     "raw", "int", "zipmap", "hashtable" 
 136 /* Object types only used for dumping to disk */ 
 137 #define REDIS_EXPIRETIME 253 
 138 #define REDIS_SELECTDB 254 
 139 #define REDIS_EOF 255 
 141 /* Defines related to the dump file format. To store 32 bits lengths for short 
 142  * keys requires a lot of space, so we check the most significant 2 bits of 
 143  * the first byte to interpreter the length: 
 145  * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte 
 146  * 01|000000 00000000 =>  01, the len is 14 byes, 6 bits + 8 bits of next byte 
 147  * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow 
 148  * 11|000000 this means: specially encoded object will follow. The six bits 
 149  *           number specify the kind of object that follows. 
 150  *           See the REDIS_RDB_ENC_* defines. 
 152  * Lenghts up to 63 are stored using a single byte, most DB keys, and may 
 153  * values, will fit inside. */ 
 154 #define REDIS_RDB_6BITLEN 0 
 155 #define REDIS_RDB_14BITLEN 1 
 156 #define REDIS_RDB_32BITLEN 2 
 157 #define REDIS_RDB_ENCVAL 3 
 158 #define REDIS_RDB_LENERR UINT_MAX 
 160 /* When a length of a string object stored on disk has the first two bits 
 161  * set, the remaining two bits specify a special encoding for the object 
 162  * accordingly to the following defines: */ 
 163 #define REDIS_RDB_ENC_INT8 0        /* 8 bit signed integer */ 
 164 #define REDIS_RDB_ENC_INT16 1       /* 16 bit signed integer */ 
 165 #define REDIS_RDB_ENC_INT32 2       /* 32 bit signed integer */ 
 166 #define REDIS_RDB_ENC_LZF 3         /* string compressed with FASTLZ */ 
 168 /* Virtual memory object->where field. */ 
 169 #define REDIS_VM_MEMORY 0       /* The object is on memory */ 
 170 #define REDIS_VM_SWAPPED 1      /* The object is on disk */ 
 171 #define REDIS_VM_SWAPPING 2     /* Redis is swapping this object on disk */ 
 172 #define REDIS_VM_LOADING 3      /* Redis is loading this object from disk */ 
 174 /* Virtual memory static configuration stuff. 
 175  * Check vmFindContiguousPages() to know more about this magic numbers. */ 
 176 #define REDIS_VM_MAX_NEAR_PAGES 65536 
 177 #define REDIS_VM_MAX_RANDOM_JUMP 4096 
 178 #define REDIS_VM_MAX_THREADS 32 
 179 #define REDIS_THREAD_STACK_SIZE (1024*1024*4) 
 180 /* The following is the *percentage* of completed I/O jobs to process when the 
 181  * handelr is called. While Virtual Memory I/O operations are performed by 
 182  * threads, this operations must be processed by the main thread when completed 
 183  * in order to take effect. */ 
 184 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 
 187 #define REDIS_SLAVE 1       /* This client is a slave server */ 
 188 #define REDIS_MASTER 2      /* This client is a master server */ 
 189 #define REDIS_MONITOR 4     /* This client is a slave monitor, see MONITOR */ 
 190 #define REDIS_MULTI 8       /* This client is in a MULTI context */ 
 191 #define REDIS_BLOCKED 16    /* The client is waiting in a blocking operation */ 
 192 #define REDIS_IO_WAIT 32    /* The client is waiting for Virtual Memory I/O */ 
 194 /* Slave replication state - slave side */ 
 195 #define REDIS_REPL_NONE 0   /* No active replication */ 
 196 #define REDIS_REPL_CONNECT 1    /* Must connect to master */ 
 197 #define REDIS_REPL_CONNECTED 2  /* Connected to master */ 
 199 /* Slave replication state - from the point of view of master 
 200  * Note that in SEND_BULK and ONLINE state the slave receives new updates 
 201  * in its output queue. In the WAIT_BGSAVE state instead the server is waiting 
 202  * to start the next background saving in order to send updates to it. */ 
 203 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ 
 204 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ 
 205 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ 
 206 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ 
 208 /* List related stuff */ 
 212 /* Sort operations */ 
 213 #define REDIS_SORT_GET 0 
 214 #define REDIS_SORT_ASC 1 
 215 #define REDIS_SORT_DESC 2 
 216 #define REDIS_SORTKEY_MAX 1024 
 219 #define REDIS_DEBUG 0 
 220 #define REDIS_VERBOSE 1 
 221 #define REDIS_NOTICE 2 
 222 #define REDIS_WARNING 3 
 224 /* Anti-warning macro... */ 
 225 #define REDIS_NOTUSED(V) ((void) V) 
 227 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ 
 228 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */ 
 230 /* Append only defines */ 
 231 #define APPENDFSYNC_NO 0 
 232 #define APPENDFSYNC_ALWAYS 1 
 233 #define APPENDFSYNC_EVERYSEC 2 
 235 /* Hashes related defaults */ 
 236 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64 
 237 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 
 239 /* We can print the stacktrace, so our assert is defined this way: */ 
 240 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) 
 241 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1) 
 242 static void _redisAssert(char *estr
, char *file
, int line
); 
 243 static void _redisPanic(char *msg
, char *file
, int line
); 
 245 /*================================= Data types ============================== */ 
 247 /* A redis object, that is a type able to hold a string / list / set */ 
 249 /* The VM object structure */ 
 250 struct redisObjectVM 
{ 
 251     off_t page
;         /* the page at witch the object is stored on disk */ 
 252     off_t usedpages
;    /* number of pages used on disk */ 
 253     time_t atime
;       /* Last access time */ 
 256 /* The actual Redis Object */ 
 257 typedef struct redisObject 
{ 
 260     unsigned char encoding
; 
 261     unsigned char storage
;  /* If this object is a key, where is the value? 
 262                              * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */ 
 263     unsigned char vtype
; /* If this object is a key, and value is swapped out, 
 264                           * this is the type of the swapped out object. */ 
 266     /* VM fields, this are only allocated if VM is active, otherwise the 
 267      * object allocation function will just allocate 
 268      * sizeof(redisObjct) minus sizeof(redisObjectVM), so using 
 269      * Redis without VM active will not have any overhead. */ 
 270     struct redisObjectVM vm
; 
 273 /* Macro used to initalize a Redis object allocated on the stack. 
 274  * Note that this macro is taken near the structure definition to make sure 
 275  * we'll update it when the structure is changed, to avoid bugs like 
 276  * bug #85 introduced exactly in this way. */ 
 277 #define initStaticStringObject(_var,_ptr) do { \ 
 279     _var.type = REDIS_STRING; \ 
 280     _var.encoding = REDIS_ENCODING_RAW; \ 
 282     if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \ 
 285 typedef struct redisDb 
{ 
 286     dict 
*dict
;                 /* The keyspace for this DB */ 
 287     dict 
*expires
;              /* Timeout of keys with a timeout set */ 
 288     dict 
*blockingkeys
;         /* Keys with clients waiting for data (BLPOP) */ 
 289     dict 
*io_keys
;              /* Keys with clients waiting for VM I/O */ 
 293 /* Client MULTI/EXEC state */ 
 294 typedef struct multiCmd 
{ 
 297     struct redisCommand 
*cmd
; 
 300 typedef struct multiState 
{ 
 301     multiCmd 
*commands
;     /* Array of MULTI commands */ 
 302     int count
;              /* Total number of MULTI commands */ 
 305 /* With multiplexing we need to take per-clinet state. 
 306  * Clients are taken in a liked list. */ 
 307 typedef struct redisClient 
{ 
 312     robj 
**argv
, **mbargv
; 
 314     int bulklen
;            /* bulk read len. -1 if not in bulk read mode */ 
 315     int multibulk
;          /* multi bulk command format active */ 
 318     time_t lastinteraction
; /* time of the last interaction, used for timeout */ 
 319     int flags
;              /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ 
 320     int slaveseldb
;         /* slave selected db, if this client is a slave */ 
 321     int authenticated
;      /* when requirepass is non-NULL */ 
 322     int replstate
;          /* replication state if this is a slave */ 
 323     int repldbfd
;           /* replication DB file descriptor */ 
 324     long repldboff
;         /* replication DB file offset */ 
 325     off_t repldbsize
;       /* replication DB file size */ 
 326     multiState mstate
;      /* MULTI/EXEC state */ 
 327     robj 
**blockingkeys
;    /* The key we are waiting to terminate a blocking 
 328                              * operation such as BLPOP. Otherwise NULL. */ 
 329     int blockingkeysnum
;    /* Number of blocking keys */ 
 330     time_t blockingto
;      /* Blocking operation timeout. If UNIX current time 
 331                              * is >= blockingto then the operation timed out. */ 
 332     list 
*io_keys
;          /* Keys this client is waiting to be loaded from the 
 333                              * swap file in order to continue. */ 
 334     dict 
*pubsub_channels
;  /* channels a client is interested in (SUBSCRIBE) */ 
 335     list 
*pubsub_patterns
;  /* patterns a client is interested in (SUBSCRIBE) */ 
 343 /* Global server state structure */ 
 348     long long dirty
;            /* changes to DB from the last save */ 
 350     list 
*slaves
, *monitors
; 
 351     char neterr
[ANET_ERR_LEN
]; 
 353     int cronloops
;              /* number of times the cron function run */ 
 354     list 
*objfreelist
;          /* A list of freed objects to avoid malloc() */ 
 355     time_t lastsave
;            /* Unix time of last save succeeede */ 
 356     /* Fields used only for stats */ 
 357     time_t stat_starttime
;         /* server start time */ 
 358     long long stat_numcommands
;    /* number of processed commands */ 
 359     long long stat_numconnections
; /* number of connections received */ 
 360     long long stat_expiredkeys
;   /* number of expired keys */ 
 373     pid_t bgsavechildpid
; 
 374     pid_t bgrewritechildpid
; 
 375     sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */ 
 376     sds aofbuf
;       /* AOF buffer, written before entering the event loop */ 
 377     struct saveparam 
*saveparams
; 
 382     char *appendfilename
; 
 386     /* Replication related */ 
 391     redisClient 
*master
;    /* client that is master for this slave */ 
 393     unsigned int maxclients
; 
 394     unsigned long long maxmemory
; 
 395     unsigned int blpop_blocked_clients
; 
 396     unsigned int vm_blocked_clients
; 
 397     /* Sort parameters - qsort_r() is only available under BSD so we 
 398      * have to take this state global, in order to pass it to sortCompare() */ 
 402     /* Virtual memory configuration */ 
 407     unsigned long long vm_max_memory
; 
 409     size_t hash_max_zipmap_entries
; 
 410     size_t hash_max_zipmap_value
; 
 411     /* Virtual memory state */ 
 414     off_t vm_next_page
; /* Next probably empty page */ 
 415     off_t vm_near_pages
; /* Number of pages allocated sequentially */ 
 416     unsigned char *vm_bitmap
; /* Bitmap of free/used pages */ 
 417     time_t unixtime
;    /* Unix time sampled every second. */ 
 418     /* Virtual memory I/O threads stuff */ 
 419     /* An I/O thread process an element taken from the io_jobs queue and 
 420      * put the result of the operation in the io_done list. While the 
 421      * job is being processed, it's put on io_processing queue. */ 
 422     list 
*io_newjobs
; /* List of VM I/O jobs yet to be processed */ 
 423     list 
*io_processing
; /* List of VM I/O jobs being processed */ 
 424     list 
*io_processed
; /* List of VM I/O jobs already processed */ 
 425     list 
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */ 
 426     pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */ 
 427     pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */ 
 428     pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */ 
 429     pthread_attr_t io_threads_attr
; /* attributes for threads creation */ 
 430     int io_active_threads
; /* Number of running I/O threads */ 
 431     int vm_max_threads
; /* Max number of I/O threads running at the same time */ 
 432     /* Our main thread is blocked on the event loop, locking for sockets ready 
 433      * to be read or written, so when a threaded I/O operation is ready to be 
 434      * processed by the main thread, the I/O thread will use a unix pipe to 
 435      * awake the main thread. The followings are the two pipe FDs. */ 
 436     int io_ready_pipe_read
; 
 437     int io_ready_pipe_write
; 
 438     /* Virtual memory stats */ 
 439     unsigned long long vm_stats_used_pages
; 
 440     unsigned long long vm_stats_swapped_objects
; 
 441     unsigned long long vm_stats_swapouts
; 
 442     unsigned long long vm_stats_swapins
; 
 444     dict 
*pubsub_channels
; /* Map channels to list of subscribed clients */ 
 445     list 
*pubsub_patterns
; /* A list of pubsub_patterns */ 
 450 typedef struct pubsubPattern 
{ 
 455 typedef void redisCommandProc(redisClient 
*c
); 
 456 typedef void redisVmPreloadProc(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 457 struct redisCommand 
{ 
 459     redisCommandProc 
*proc
; 
 462     /* Use a function to determine which keys need to be loaded 
 463      * in the background prior to executing this command. Takes precedence 
 464      * over vm_firstkey and others, ignored when NULL */ 
 465     redisVmPreloadProc 
*vm_preload_proc
; 
 466     /* What keys should be loaded in background when calling this command? */ 
 467     int vm_firstkey
; /* The first argument that's a key (0 = no keys) */ 
 468     int vm_lastkey
;  /* THe last argument that's a key */ 
 469     int vm_keystep
;  /* The step between first and last key */ 
 472 struct redisFunctionSym 
{ 
 474     unsigned long pointer
; 
 477 typedef struct _redisSortObject 
{ 
 485 typedef struct _redisSortOperation 
{ 
 488 } redisSortOperation
; 
 490 /* ZSETs use a specialized version of Skiplists */ 
 492 typedef struct zskiplistNode 
{ 
 493     struct zskiplistNode 
**forward
; 
 494     struct zskiplistNode 
*backward
; 
 500 typedef struct zskiplist 
{ 
 501     struct zskiplistNode 
*header
, *tail
; 
 502     unsigned long length
; 
 506 typedef struct zset 
{ 
 511 /* Our shared "common" objects */ 
 513 #define REDIS_SHARED_INTEGERS 10000 
 514 struct sharedObjectsStruct 
{ 
 515     robj 
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
, 
 516     *colon
, *nullbulk
, *nullmultibulk
, *queued
, 
 517     *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
, 
 518     *outofrangeerr
, *plus
, 
 519     *select0
, *select1
, *select2
, *select3
, *select4
, 
 520     *select5
, *select6
, *select7
, *select8
, *select9
, 
 521     *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
, 
 522     *mbulk4
, *psubscribebulk
, *punsubscribebulk
, 
 523     *integers
[REDIS_SHARED_INTEGERS
]; 
 526 /* Global vars that are actally used as constants. The following double 
 527  * values are used for double on-disk serialization, and are initialized 
 528  * at runtime to avoid strange compiler optimizations. */ 
 530 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
; 
 532 /* VM threaded I/O request message */ 
 533 #define REDIS_IOJOB_LOAD 0          /* Load from disk to memory */ 
 534 #define REDIS_IOJOB_PREPARE_SWAP 1  /* Compute needed pages */ 
 535 #define REDIS_IOJOB_DO_SWAP 2       /* Swap from memory to disk */ 
 536 typedef struct iojob 
{ 
 537     int type
;   /* Request type, REDIS_IOJOB_* */ 
 538     redisDb 
*db
;/* Redis database */ 
 539     robj 
*key
;  /* This I/O request is about swapping this key */ 
 540     robj 
*val
;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this 
 541                  * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ 
 542     off_t page
; /* Swap page where to read/write the object */ 
 543     off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */ 
 544     int canceled
; /* True if this command was canceled by blocking side of VM */ 
 545     pthread_t thread
; /* ID of the thread processing this entry */ 
 548 /*================================ Prototypes =============================== */ 
 550 static void freeStringObject(robj 
*o
); 
 551 static void freeListObject(robj 
*o
); 
 552 static void freeSetObject(robj 
*o
); 
 553 static void decrRefCount(void *o
); 
 554 static robj 
*createObject(int type
, void *ptr
); 
 555 static void freeClient(redisClient 
*c
); 
 556 static int rdbLoad(char *filename
); 
 557 static void addReply(redisClient 
*c
, robj 
*obj
); 
 558 static void addReplySds(redisClient 
*c
, sds s
); 
 559 static void incrRefCount(robj 
*o
); 
 560 static int rdbSaveBackground(char *filename
); 
 561 static robj 
*createStringObject(char *ptr
, size_t len
); 
 562 static robj 
*dupStringObject(robj 
*o
); 
 563 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
); 
 564 static void replicationFeedMonitors(list 
*monitors
, int dictid
, robj 
**argv
, int argc
); 
 565 static void flushAppendOnlyFile(void); 
 566 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 567 static int syncWithMaster(void); 
 568 static robj 
*tryObjectEncoding(robj 
*o
); 
 569 static robj 
*getDecodedObject(robj 
*o
); 
 570 static int removeExpire(redisDb 
*db
, robj 
*key
); 
 571 static int expireIfNeeded(redisDb 
*db
, robj 
*key
); 
 572 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
); 
 573 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
); 
 574 static int deleteKey(redisDb 
*db
, robj 
*key
); 
 575 static time_t getExpire(redisDb 
*db
, robj 
*key
); 
 576 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
); 
 577 static void updateSlavesWaitingBgsave(int bgsaveerr
); 
 578 static void freeMemoryIfNeeded(void); 
 579 static int processCommand(redisClient 
*c
); 
 580 static void setupSigSegvAction(void); 
 581 static void rdbRemoveTempFile(pid_t childpid
); 
 582 static void aofRemoveTempFile(pid_t childpid
); 
 583 static size_t stringObjectLen(robj 
*o
); 
 584 static void processInputBuffer(redisClient 
*c
); 
 585 static zskiplist 
*zslCreate(void); 
 586 static void zslFree(zskiplist 
*zsl
); 
 587 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
); 
 588 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 589 static void initClientMultiState(redisClient 
*c
); 
 590 static void freeClientMultiState(redisClient 
*c
); 
 591 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
); 
 592 static void unblockClientWaitingData(redisClient 
*c
); 
 593 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
); 
 594 static void vmInit(void); 
 595 static void vmMarkPagesFree(off_t page
, off_t count
); 
 596 static robj 
*vmLoadObject(robj 
*key
); 
 597 static robj 
*vmPreviewObject(robj 
*key
); 
 598 static int vmSwapOneObjectBlocking(void); 
 599 static int vmSwapOneObjectThreaded(void); 
 600 static int vmCanSwapOut(void); 
 601 static int tryFreeOneObjectFromFreelist(void); 
 602 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 603 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 604 static void vmCancelThreadedIOJob(robj 
*o
); 
 605 static void lockThreadedIO(void); 
 606 static void unlockThreadedIO(void); 
 607 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
); 
 608 static void freeIOJob(iojob 
*j
); 
 609 static void queueIOJob(iojob 
*j
); 
 610 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
); 
 611 static robj 
*vmReadObjectFromSwap(off_t page
, int type
); 
 612 static void waitEmptyIOJobsQueue(void); 
 613 static void vmReopenSwapFile(void); 
 614 static int vmFreePage(off_t page
); 
 615 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 616 static void execBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 617 static int blockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
); 
 618 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
); 
 619 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
); 
 620 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 621 static struct redisCommand 
*lookupCommand(char *name
); 
 622 static void call(redisClient 
*c
, struct redisCommand 
*cmd
); 
 623 static void resetClient(redisClient 
*c
); 
 624 static void convertToRealHash(robj 
*o
); 
 625 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
); 
 626 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
); 
 627 static void freePubsubPattern(void *p
); 
 628 static int listMatchPubsubPattern(void *a
, void *b
); 
 629 static int compareStringObjects(robj 
*a
, robj 
*b
); 
 630 static int equalStringObjects(robj 
*a
, robj 
*b
); 
 632 static int rewriteAppendOnlyFileBackground(void); 
 633 static int vmSwapObjectBlocking(robj 
*key
, robj 
*val
); 
 635 static void authCommand(redisClient 
*c
); 
 636 static void pingCommand(redisClient 
*c
); 
 637 static void echoCommand(redisClient 
*c
); 
 638 static void setCommand(redisClient 
*c
); 
 639 static void setnxCommand(redisClient 
*c
); 
 640 static void setexCommand(redisClient 
*c
); 
 641 static void getCommand(redisClient 
*c
); 
 642 static void delCommand(redisClient 
*c
); 
 643 static void existsCommand(redisClient 
*c
); 
 644 static void incrCommand(redisClient 
*c
); 
 645 static void decrCommand(redisClient 
*c
); 
 646 static void incrbyCommand(redisClient 
*c
); 
 647 static void decrbyCommand(redisClient 
*c
); 
 648 static void selectCommand(redisClient 
*c
); 
 649 static void randomkeyCommand(redisClient 
*c
); 
 650 static void keysCommand(redisClient 
*c
); 
 651 static void dbsizeCommand(redisClient 
*c
); 
 652 static void lastsaveCommand(redisClient 
*c
); 
 653 static void saveCommand(redisClient 
*c
); 
 654 static void bgsaveCommand(redisClient 
*c
); 
 655 static void bgrewriteaofCommand(redisClient 
*c
); 
 656 static void shutdownCommand(redisClient 
*c
); 
 657 static void moveCommand(redisClient 
*c
); 
 658 static void renameCommand(redisClient 
*c
); 
 659 static void renamenxCommand(redisClient 
*c
); 
 660 static void lpushCommand(redisClient 
*c
); 
 661 static void rpushCommand(redisClient 
*c
); 
 662 static void lpopCommand(redisClient 
*c
); 
 663 static void rpopCommand(redisClient 
*c
); 
 664 static void llenCommand(redisClient 
*c
); 
 665 static void lindexCommand(redisClient 
*c
); 
 666 static void lrangeCommand(redisClient 
*c
); 
 667 static void ltrimCommand(redisClient 
*c
); 
 668 static void typeCommand(redisClient 
*c
); 
 669 static void lsetCommand(redisClient 
*c
); 
 670 static void saddCommand(redisClient 
*c
); 
 671 static void sremCommand(redisClient 
*c
); 
 672 static void smoveCommand(redisClient 
*c
); 
 673 static void sismemberCommand(redisClient 
*c
); 
 674 static void scardCommand(redisClient 
*c
); 
 675 static void spopCommand(redisClient 
*c
); 
 676 static void srandmemberCommand(redisClient 
*c
); 
 677 static void sinterCommand(redisClient 
*c
); 
 678 static void sinterstoreCommand(redisClient 
*c
); 
 679 static void sunionCommand(redisClient 
*c
); 
 680 static void sunionstoreCommand(redisClient 
*c
); 
 681 static void sdiffCommand(redisClient 
*c
); 
 682 static void sdiffstoreCommand(redisClient 
*c
); 
 683 static void syncCommand(redisClient 
*c
); 
 684 static void flushdbCommand(redisClient 
*c
); 
 685 static void flushallCommand(redisClient 
*c
); 
 686 static void sortCommand(redisClient 
*c
); 
 687 static void lremCommand(redisClient 
*c
); 
 688 static void rpoplpushcommand(redisClient 
*c
); 
 689 static void infoCommand(redisClient 
*c
); 
 690 static void mgetCommand(redisClient 
*c
); 
 691 static void monitorCommand(redisClient 
*c
); 
 692 static void expireCommand(redisClient 
*c
); 
 693 static void expireatCommand(redisClient 
*c
); 
 694 static void getsetCommand(redisClient 
*c
); 
 695 static void ttlCommand(redisClient 
*c
); 
 696 static void slaveofCommand(redisClient 
*c
); 
 697 static void debugCommand(redisClient 
*c
); 
 698 static void msetCommand(redisClient 
*c
); 
 699 static void msetnxCommand(redisClient 
*c
); 
 700 static void zaddCommand(redisClient 
*c
); 
 701 static void zincrbyCommand(redisClient 
*c
); 
 702 static void zrangeCommand(redisClient 
*c
); 
 703 static void zrangebyscoreCommand(redisClient 
*c
); 
 704 static void zcountCommand(redisClient 
*c
); 
 705 static void zrevrangeCommand(redisClient 
*c
); 
 706 static void zcardCommand(redisClient 
*c
); 
 707 static void zremCommand(redisClient 
*c
); 
 708 static void zscoreCommand(redisClient 
*c
); 
 709 static void zremrangebyscoreCommand(redisClient 
*c
); 
 710 static void multiCommand(redisClient 
*c
); 
 711 static void execCommand(redisClient 
*c
); 
 712 static void discardCommand(redisClient 
*c
); 
 713 static void blpopCommand(redisClient 
*c
); 
 714 static void brpopCommand(redisClient 
*c
); 
 715 static void appendCommand(redisClient 
*c
); 
 716 static void substrCommand(redisClient 
*c
); 
 717 static void zrankCommand(redisClient 
*c
); 
 718 static void zrevrankCommand(redisClient 
*c
); 
 719 static void hsetCommand(redisClient 
*c
); 
 720 static void hsetnxCommand(redisClient 
*c
); 
 721 static void hgetCommand(redisClient 
*c
); 
 722 static void hmsetCommand(redisClient 
*c
); 
 723 static void hmgetCommand(redisClient 
*c
); 
 724 static void hdelCommand(redisClient 
*c
); 
 725 static void hlenCommand(redisClient 
*c
); 
 726 static void zremrangebyrankCommand(redisClient 
*c
); 
 727 static void zunionstoreCommand(redisClient 
*c
); 
 728 static void zinterstoreCommand(redisClient 
*c
); 
 729 static void hkeysCommand(redisClient 
*c
); 
 730 static void hvalsCommand(redisClient 
*c
); 
 731 static void hgetallCommand(redisClient 
*c
); 
 732 static void hexistsCommand(redisClient 
*c
); 
 733 static void configCommand(redisClient 
*c
); 
 734 static void hincrbyCommand(redisClient 
*c
); 
 735 static void subscribeCommand(redisClient 
*c
); 
 736 static void unsubscribeCommand(redisClient 
*c
); 
 737 static void psubscribeCommand(redisClient 
*c
); 
 738 static void punsubscribeCommand(redisClient 
*c
); 
 739 static void publishCommand(redisClient 
*c
); 
 741 /*================================= Globals ================================= */ 
 744 static struct redisServer server
; /* server global state */ 
 745 static struct redisCommand cmdTable
[] = { 
 746     {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 747     {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 748     {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 749     {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 750     {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 751     {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 752     {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 753     {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 754     {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 755     {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 756     {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1}, 
 757     {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 758     {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 759     {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 760     {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 761     {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 762     {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 763     {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 764     {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 765     {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 766     {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 767     {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 768     {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 769     {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1}, 
 770     {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 771     {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 772     {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1}, 
 773     {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 774     {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 775     {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 776     {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 777     {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 778     {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 779     {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 780     {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 781     {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 782     {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 783     {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 784     {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 785     {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 786     {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 787     {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 788     {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 789     {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 790     {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 791     {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 792     {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 793     {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 794     {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 795     {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 796     {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 797     {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 798     {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 799     {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 800     {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 801     {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 802     {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 803     {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 804     {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 805     {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 806     {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 807     {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 808     {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 809     {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 810     {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 811     {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 812     {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 813     {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 814     {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 815     {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 816     {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 817     {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 818     {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 819     {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 820     {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 821     {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 822     {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 823     {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 824     {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 825     {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 826     {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 827     {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 828     {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 829     {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 830     {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 831     {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 832     {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 833     {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 834     {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 835     {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0}, 
 836     {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 837     {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 838     {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 839     {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 840     {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 841     {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 842     {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 843     {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 844     {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 845     {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 846     {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 847     {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 848     {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 849     {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 850     {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 851     {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0}, 
 852     {NULL
,NULL
,0,0,NULL
,0,0,0} 
 855 /*============================ Utility functions ============================ */ 
 857 /* Glob-style pattern matching. */ 
 858 static int stringmatchlen(const char *pattern
, int patternLen
, 
 859         const char *string
, int stringLen
, int nocase
) 
 864             while (pattern
[1] == '*') { 
 869                 return 1; /* match */ 
 871                 if (stringmatchlen(pattern
+1, patternLen
-1, 
 872                             string
, stringLen
, nocase
)) 
 873                     return 1; /* match */ 
 877             return 0; /* no match */ 
 881                 return 0; /* no match */ 
 891             not = pattern
[0] == '^'; 
 898                 if (pattern
[0] == '\\') { 
 901                     if (pattern
[0] == string
[0]) 
 903                 } else if (pattern
[0] == ']') { 
 905                 } else if (patternLen 
== 0) { 
 909                 } else if (pattern
[1] == '-' && patternLen 
>= 3) { 
 910                     int start 
= pattern
[0]; 
 911                     int end 
= pattern
[2]; 
 919                         start 
= tolower(start
); 
 925                     if (c 
>= start 
&& c 
<= end
) 
 929                         if (pattern
[0] == string
[0]) 
 932                         if (tolower((int)pattern
[0]) == tolower((int)string
[0])) 
 942                 return 0; /* no match */ 
 948             if (patternLen 
>= 2) { 
 955                 if (pattern
[0] != string
[0]) 
 956                     return 0; /* no match */ 
 958                 if (tolower((int)pattern
[0]) != tolower((int)string
[0])) 
 959                     return 0; /* no match */ 
 967         if (stringLen 
== 0) { 
 968             while(*pattern 
== '*') { 
 975     if (patternLen 
== 0 && stringLen 
== 0) 
 980 static int stringmatch(const char *pattern
, const char *string
, int nocase
) { 
 981     return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
); 
 984 /* Convert a string representing an amount of memory into the number of 
 985  * bytes, so for instance memtoll("1Gi") will return 1073741824 that is 
 988  * On parsing error, if *err is not NULL, it's set to 1, otherwise it's 
 990 static long long memtoll(const char *p
, int *err
) { 
 993     long mul
; /* unit multiplier */ 
 998     /* Search the first non digit character. */ 
1001     while(*u 
&& isdigit(*u
)) u
++; 
1002     if (*u 
== '\0' || !strcasecmp(u
,"b")) { 
1004     } else if (!strcasecmp(u
,"k")) { 
1006     } else if (!strcasecmp(u
,"kb")) { 
1008     } else if (!strcasecmp(u
,"m")) { 
1010     } else if (!strcasecmp(u
,"mb")) { 
1012     } else if (!strcasecmp(u
,"g")) { 
1013         mul 
= 1000L*1000*1000; 
1014     } else if (!strcasecmp(u
,"gb")) { 
1015         mul 
= 1024L*1024*1024; 
1021     if (digits 
>= sizeof(buf
)) { 
1025     memcpy(buf
,p
,digits
); 
1027     val 
= strtoll(buf
,NULL
,10); 
1031 /* Convert a long long into a string. Returns the number of 
1032  * characters needed to represent the number, that can be shorter if passed 
1033  * buffer length is not enough to store the whole number. */ 
1034 static int ll2string(char *s
, size_t len
, long long value
) { 
1036     unsigned long long v
; 
1039     if (len 
== 0) return 0; 
1040     v 
= (value 
< 0) ? -value 
: value
; 
1041     p 
= buf
+31; /* point to the last character */ 
1046     if (value 
< 0) *p
-- = '-'; 
1049     if (l
+1 > len
) l 
= len
-1; /* Make sure it fits, including the nul term */ 
1055 static void redisLog(int level
, const char *fmt
, ...) { 
1059     fp 
= (server
.logfile 
== NULL
) ? stdout 
: fopen(server
.logfile
,"a"); 
1063     if (level 
>= server
.verbosity
) { 
1069         strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
)); 
1070         fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]); 
1071         vfprintf(fp
, fmt
, ap
); 
1077     if (server
.logfile
) fclose(fp
); 
1080 /*====================== Hash table type implementation  ==================== */ 
1082 /* This is an hash table type that uses the SDS dynamic strings libary as 
1083  * keys and radis objects as values (objects can hold SDS strings, 
1086 static void dictVanillaFree(void *privdata
, void *val
) 
1088     DICT_NOTUSED(privdata
); 
1092 static void dictListDestructor(void *privdata
, void *val
) 
1094     DICT_NOTUSED(privdata
); 
1095     listRelease((list
*)val
); 
1098 static int sdsDictKeyCompare(void *privdata
, const void *key1
, 
1102     DICT_NOTUSED(privdata
); 
1104     l1 
= sdslen((sds
)key1
); 
1105     l2 
= sdslen((sds
)key2
); 
1106     if (l1 
!= l2
) return 0; 
1107     return memcmp(key1
, key2
, l1
) == 0; 
1110 static void dictRedisObjectDestructor(void *privdata
, void *val
) 
1112     DICT_NOTUSED(privdata
); 
1114     if (val 
== NULL
) return; /* Values of swapped out keys as set to NULL */ 
1118 static int dictObjKeyCompare(void *privdata
, const void *key1
, 
1121     const robj 
*o1 
= key1
, *o2 
= key2
; 
1122     return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1125 static unsigned int dictObjHash(const void *key
) { 
1126     const robj 
*o 
= key
; 
1127     return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1130 static int dictEncObjKeyCompare(void *privdata
, const void *key1
, 
1133     robj 
*o1 
= (robj
*) key1
, *o2 
= (robj
*) key2
; 
1136     if (o1
->encoding 
== REDIS_ENCODING_INT 
&& 
1137         o2
->encoding 
== REDIS_ENCODING_INT
) 
1138             return o1
->ptr 
== o2
->ptr
; 
1140     o1 
= getDecodedObject(o1
); 
1141     o2 
= getDecodedObject(o2
); 
1142     cmp 
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1148 static unsigned int dictEncObjHash(const void *key
) { 
1149     robj 
*o 
= (robj
*) key
; 
1151     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
1152         return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1154         if (o
->encoding 
== REDIS_ENCODING_INT
) { 
1158             len 
= ll2string(buf
,32,(long)o
->ptr
); 
1159             return dictGenHashFunction((unsigned char*)buf
, len
); 
1163             o 
= getDecodedObject(o
); 
1164             hash 
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1171 /* Sets type and expires */ 
1172 static dictType setDictType 
= { 
1173     dictEncObjHash
,            /* hash function */ 
1176     dictEncObjKeyCompare
,      /* key compare */ 
1177     dictRedisObjectDestructor
, /* key destructor */ 
1178     NULL                       
/* val destructor */ 
1181 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ 
1182 static dictType zsetDictType 
= { 
1183     dictEncObjHash
,            /* hash function */ 
1186     dictEncObjKeyCompare
,      /* key compare */ 
1187     dictRedisObjectDestructor
, /* key destructor */ 
1188     dictVanillaFree            
/* val destructor of malloc(sizeof(double)) */ 
1192 static dictType dbDictType 
= { 
1193     dictObjHash
,                /* hash function */ 
1196     dictObjKeyCompare
,          /* key compare */ 
1197     dictRedisObjectDestructor
,  /* key destructor */ 
1198     dictRedisObjectDestructor   
/* val destructor */ 
1202 static dictType keyptrDictType 
= { 
1203     dictObjHash
,               /* hash function */ 
1206     dictObjKeyCompare
,         /* key compare */ 
1207     dictRedisObjectDestructor
, /* key destructor */ 
1208     NULL                       
/* val destructor */ 
1211 /* Hash type hash table (note that small hashes are represented with zimpaps) */ 
1212 static dictType hashDictType 
= { 
1213     dictEncObjHash
,             /* hash function */ 
1216     dictEncObjKeyCompare
,       /* key compare */ 
1217     dictRedisObjectDestructor
,  /* key destructor */ 
1218     dictRedisObjectDestructor   
/* val destructor */ 
1221 /* Keylist hash table type has unencoded redis objects as keys and 
1222  * lists as values. It's used for blocking operations (BLPOP) and to 
1223  * map swapped keys to a list of clients waiting for this keys to be loaded. */ 
1224 static dictType keylistDictType 
= { 
1225     dictObjHash
,                /* hash function */ 
1228     dictObjKeyCompare
,          /* key compare */ 
1229     dictRedisObjectDestructor
,  /* key destructor */ 
1230     dictListDestructor          
/* val destructor */ 
1233 static void version(); 
1235 /* ========================= Random utility functions ======================= */ 
1237 /* Redis generally does not try to recover from out of memory conditions 
1238  * when allocating objects or strings, it is not clear if it will be possible 
1239  * to report this condition to the client since the networking layer itself 
1240  * is based on heap allocation for send buffers, so we simply abort. 
1241  * At least the code will be simpler to read... */ 
1242 static void oom(const char *msg
) { 
1243     redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
); 
1248 /* ====================== Redis server networking stuff ===================== */ 
1249 static void closeTimedoutClients(void) { 
1252     time_t now 
= time(NULL
); 
1255     listRewind(server
.clients
,&li
); 
1256     while ((ln 
= listNext(&li
)) != NULL
) { 
1257         c 
= listNodeValue(ln
); 
1258         if (server
.maxidletime 
&& 
1259             !(c
->flags 
& REDIS_SLAVE
) &&    /* no timeout for slaves */ 
1260             !(c
->flags 
& REDIS_MASTER
) &&   /* no timeout for masters */ 
1261             dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */ 
1262             listLength(c
->pubsub_patterns
) == 0 && 
1263             (now 
- c
->lastinteraction 
> server
.maxidletime
)) 
1265             redisLog(REDIS_VERBOSE
,"Closing idle client"); 
1267         } else if (c
->flags 
& REDIS_BLOCKED
) { 
1268             if (c
->blockingto 
!= 0 && c
->blockingto 
< now
) { 
1269                 addReply(c
,shared
.nullmultibulk
); 
1270                 unblockClientWaitingData(c
); 
1276 static int htNeedsResize(dict 
*dict
) { 
1277     long long size
, used
; 
1279     size 
= dictSlots(dict
); 
1280     used 
= dictSize(dict
); 
1281     return (size 
&& used 
&& size 
> DICT_HT_INITIAL_SIZE 
&& 
1282             (used
*100/size 
< REDIS_HT_MINFILL
)); 
1285 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL 
1286  * we resize the hash table to save memory */ 
1287 static void tryResizeHashTables(void) { 
1290     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1291         if (htNeedsResize(server
.db
[j
].dict
)) 
1292             dictResize(server
.db
[j
].dict
); 
1293         if (htNeedsResize(server
.db
[j
].expires
)) 
1294             dictResize(server
.db
[j
].expires
); 
1298 /* Our hash table implementation performs rehashing incrementally while 
1299  * we write/read from the hash table. Still if the server is idle, the hash 
1300  * table will use two tables for a long time. So we try to use 1 millisecond 
1301  * of CPU time at every serverCron() loop in order to rehash some key. */ 
1302 static void incrementallyRehash(void) { 
1305     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1306         if (dictIsRehashing(server
.db
[j
].dict
)) { 
1307             dictRehashMilliseconds(server
.db
[j
].dict
,1); 
1308             break; /* already used our millisecond for this loop... */ 
1313 /* A background saving child (BGSAVE) terminated its work. Handle this. */ 
1314 void backgroundSaveDoneHandler(int statloc
) { 
1315     int exitcode 
= WEXITSTATUS(statloc
); 
1316     int bysignal 
= WIFSIGNALED(statloc
); 
1318     if (!bysignal 
&& exitcode 
== 0) { 
1319         redisLog(REDIS_NOTICE
, 
1320             "Background saving terminated with success"); 
1322         server
.lastsave 
= time(NULL
); 
1323     } else if (!bysignal 
&& exitcode 
!= 0) { 
1324         redisLog(REDIS_WARNING
, "Background saving error"); 
1326         redisLog(REDIS_WARNING
, 
1327             "Background saving terminated by signal %d", WTERMSIG(statloc
)); 
1328         rdbRemoveTempFile(server
.bgsavechildpid
); 
1330     server
.bgsavechildpid 
= -1; 
1331     /* Possibly there are slaves waiting for a BGSAVE in order to be served 
1332      * (the first stage of SYNC is a bulk transfer of dump.rdb) */ 
1333     updateSlavesWaitingBgsave(exitcode 
== 0 ? REDIS_OK 
: REDIS_ERR
); 
1336 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. 
1338 void backgroundRewriteDoneHandler(int statloc
) { 
1339     int exitcode 
= WEXITSTATUS(statloc
); 
1340     int bysignal 
= WIFSIGNALED(statloc
); 
1342     if (!bysignal 
&& exitcode 
== 0) { 
1346         redisLog(REDIS_NOTICE
, 
1347             "Background append only file rewriting terminated with success"); 
1348         /* Now it's time to flush the differences accumulated by the parent */ 
1349         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
); 
1350         fd 
= open(tmpfile
,O_WRONLY
|O_APPEND
); 
1352             redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
)); 
1355         /* Flush our data... */ 
1356         if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) != 
1357                 (signed) sdslen(server
.bgrewritebuf
)) { 
1358             redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
)); 
1362         redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
)); 
1363         /* Now our work is to rename the temp file into the stable file. And 
1364          * switch the file descriptor used by the server for append only. */ 
1365         if (rename(tmpfile
,server
.appendfilename
) == -1) { 
1366             redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
)); 
1370         /* Mission completed... almost */ 
1371         redisLog(REDIS_NOTICE
,"Append only file successfully rewritten."); 
1372         if (server
.appendfd 
!= -1) { 
1373             /* If append only is actually enabled... */ 
1374             close(server
.appendfd
); 
1375             server
.appendfd 
= fd
; 
1377             server
.appendseldb 
= -1; /* Make sure it will issue SELECT */ 
1378             redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends."); 
1380             /* If append only is disabled we just generate a dump in this 
1381              * format. Why not? */ 
1384     } else if (!bysignal 
&& exitcode 
!= 0) { 
1385         redisLog(REDIS_WARNING
, "Background append only file rewriting error"); 
1387         redisLog(REDIS_WARNING
, 
1388             "Background append only file rewriting terminated by signal %d", 
1392     sdsfree(server
.bgrewritebuf
); 
1393     server
.bgrewritebuf 
= sdsempty(); 
1394     aofRemoveTempFile(server
.bgrewritechildpid
); 
1395     server
.bgrewritechildpid 
= -1; 
1398 /* This function is called once a background process of some kind terminates, 
1399  * as we want to avoid resizing the hash tables when there is a child in order 
1400  * to play well with copy-on-write (otherwise when a resize happens lots of 
1401  * memory pages are copied). The goal of this function is to update the ability 
1402  * for dict.c to resize the hash tables accordingly to the fact we have o not 
1403  * running childs. */ 
1404 static void updateDictResizePolicy(void) { 
1405     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) 
1408         dictDisableResize(); 
1411 static int serverCron(struct aeEventLoop 
*eventLoop
, long long id
, void *clientData
) { 
1412     int j
, loops 
= server
.cronloops
++; 
1413     REDIS_NOTUSED(eventLoop
); 
1415     REDIS_NOTUSED(clientData
); 
1417     /* We take a cached value of the unix time in the global state because 
1418      * with virtual memory and aging there is to store the current time 
1419      * in objects at every object access, and accuracy is not needed. 
1420      * To access a global var is faster than calling time(NULL) */ 
1421     server
.unixtime 
= time(NULL
); 
1423     /* Show some info about non-empty databases */ 
1424     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1425         long long size
, used
, vkeys
; 
1427         size 
= dictSlots(server
.db
[j
].dict
); 
1428         used 
= dictSize(server
.db
[j
].dict
); 
1429         vkeys 
= dictSize(server
.db
[j
].expires
); 
1430         if (!(loops 
% 50) && (used 
|| vkeys
)) { 
1431             redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
); 
1432             /* dictPrintStats(server.dict); */ 
1436     /* We don't want to resize the hash tables while a bacground saving 
1437      * is in progress: the saving child is created using fork() that is 
1438      * implemented with a copy-on-write semantic in most modern systems, so 
1439      * if we resize the HT while there is the saving child at work actually 
1440      * a lot of memory movements in the parent will cause a lot of pages 
1442     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) { 
1443         if (!(loops 
% 10)) tryResizeHashTables(); 
1444         if (server
.activerehashing
) incrementallyRehash(); 
1447     /* Show information about connected clients */ 
1448     if (!(loops 
% 50)) { 
1449         redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use", 
1450             listLength(server
.clients
)-listLength(server
.slaves
), 
1451             listLength(server
.slaves
), 
1452             zmalloc_used_memory()); 
1455     /* Close connections of timedout clients */ 
1456     if ((server
.maxidletime 
&& !(loops 
% 100)) || server
.blpop_blocked_clients
) 
1457         closeTimedoutClients(); 
1459     /* Check if a background saving or AOF rewrite in progress terminated */ 
1460     if (server
.bgsavechildpid 
!= -1 || server
.bgrewritechildpid 
!= -1) { 
1464         if ((pid 
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) { 
1465             if (pid 
== server
.bgsavechildpid
) { 
1466                 backgroundSaveDoneHandler(statloc
); 
1468                 backgroundRewriteDoneHandler(statloc
); 
1470             updateDictResizePolicy(); 
1473         /* If there is not a background saving in progress check if 
1474          * we have to save now */ 
1475          time_t now 
= time(NULL
); 
1476          for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
1477             struct saveparam 
*sp 
= server
.saveparams
+j
; 
1479             if (server
.dirty 
>= sp
->changes 
&& 
1480                 now
-server
.lastsave 
> sp
->seconds
) { 
1481                 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...", 
1482                     sp
->changes
, sp
->seconds
); 
1483                 rdbSaveBackground(server
.dbfilename
); 
1489     /* Try to expire a few timed out keys. The algorithm used is adaptive and 
1490      * will use few CPU cycles if there are few expiring keys, otherwise 
1491      * it will get more aggressive to avoid that too much memory is used by 
1492      * keys that can be removed from the keyspace. */ 
1493     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1495         redisDb 
*db 
= server
.db
+j
; 
1497         /* Continue to expire if at the end of the cycle more than 25% 
1498          * of the keys were expired. */ 
1500             long num 
= dictSize(db
->expires
); 
1501             time_t now 
= time(NULL
); 
1504             if (num 
> REDIS_EXPIRELOOKUPS_PER_CRON
) 
1505                 num 
= REDIS_EXPIRELOOKUPS_PER_CRON
; 
1510                 if ((de 
= dictGetRandomKey(db
->expires
)) == NULL
) break; 
1511                 t 
= (time_t) dictGetEntryVal(de
); 
1513                     deleteKey(db
,dictGetEntryKey(de
)); 
1515                     server
.stat_expiredkeys
++; 
1518         } while (expired 
> REDIS_EXPIRELOOKUPS_PER_CRON
/4); 
1521     /* Swap a few keys on disk if we are over the memory limit and VM 
1522      * is enbled. Try to free objects from the free list first. */ 
1523     if (vmCanSwapOut()) { 
1524         while (server
.vm_enabled 
&& zmalloc_used_memory() > 
1525                 server
.vm_max_memory
) 
1529             if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
1530             retval 
= (server
.vm_max_threads 
== 0) ? 
1531                         vmSwapOneObjectBlocking() : 
1532                         vmSwapOneObjectThreaded(); 
1533             if (retval 
== REDIS_ERR 
&& !(loops 
% 300) && 
1534                 zmalloc_used_memory() > 
1535                 (server
.vm_max_memory
+server
.vm_max_memory
/10)) 
1537                 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!"); 
1539             /* Note that when using threade I/O we free just one object, 
1540              * because anyway when the I/O thread in charge to swap this 
1541              * object out will finish, the handler of completed jobs 
1542              * will try to swap more objects if we are still out of memory. */ 
1543             if (retval 
== REDIS_ERR 
|| server
.vm_max_threads 
> 0) break; 
1547     /* Check if we should connect to a MASTER */ 
1548     if (server
.replstate 
== REDIS_REPL_CONNECT 
&& !(loops 
% 10)) { 
1549         redisLog(REDIS_NOTICE
,"Connecting to MASTER..."); 
1550         if (syncWithMaster() == REDIS_OK
) { 
1551             redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded"); 
1552             if (server
.appendonly
) rewriteAppendOnlyFileBackground(); 
1558 /* This function gets called every time Redis is entering the 
1559  * main loop of the event driven library, that is, before to sleep 
1560  * for ready file descriptors. */ 
1561 static void beforeSleep(struct aeEventLoop 
*eventLoop
) { 
1562     REDIS_NOTUSED(eventLoop
); 
1564     /* Awake clients that got all the swapped keys they requested */ 
1565     if (server
.vm_enabled 
&& listLength(server
.io_ready_clients
)) { 
1569         listRewind(server
.io_ready_clients
,&li
); 
1570         while((ln 
= listNext(&li
))) { 
1571             redisClient 
*c 
= ln
->value
; 
1572             struct redisCommand 
*cmd
; 
1574             /* Resume the client. */ 
1575             listDelNode(server
.io_ready_clients
,ln
); 
1576             c
->flags 
&= (~REDIS_IO_WAIT
); 
1577             server
.vm_blocked_clients
--; 
1578             aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
1579                 readQueryFromClient
, c
); 
1580             cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
1581             assert(cmd 
!= NULL
); 
1584             /* There may be more data to process in the input buffer. */ 
1585             if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) 
1586                 processInputBuffer(c
); 
1589     /* Write the AOF buffer on disk */ 
1590     flushAppendOnlyFile(); 
1593 static void createSharedObjects(void) { 
1596     shared
.crlf 
= createObject(REDIS_STRING
,sdsnew("\r\n")); 
1597     shared
.ok 
= createObject(REDIS_STRING
,sdsnew("+OK\r\n")); 
1598     shared
.err 
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n")); 
1599     shared
.emptybulk 
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n")); 
1600     shared
.czero 
= createObject(REDIS_STRING
,sdsnew(":0\r\n")); 
1601     shared
.cone 
= createObject(REDIS_STRING
,sdsnew(":1\r\n")); 
1602     shared
.nullbulk 
= createObject(REDIS_STRING
,sdsnew("$-1\r\n")); 
1603     shared
.nullmultibulk 
= createObject(REDIS_STRING
,sdsnew("*-1\r\n")); 
1604     shared
.emptymultibulk 
= createObject(REDIS_STRING
,sdsnew("*0\r\n")); 
1605     shared
.pong 
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n")); 
1606     shared
.queued 
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n")); 
1607     shared
.wrongtypeerr 
= createObject(REDIS_STRING
,sdsnew( 
1608         "-ERR Operation against a key holding the wrong kind of value\r\n")); 
1609     shared
.nokeyerr 
= createObject(REDIS_STRING
,sdsnew( 
1610         "-ERR no such key\r\n")); 
1611     shared
.syntaxerr 
= createObject(REDIS_STRING
,sdsnew( 
1612         "-ERR syntax error\r\n")); 
1613     shared
.sameobjecterr 
= createObject(REDIS_STRING
,sdsnew( 
1614         "-ERR source and destination objects are the same\r\n")); 
1615     shared
.outofrangeerr 
= createObject(REDIS_STRING
,sdsnew( 
1616         "-ERR index out of range\r\n")); 
1617     shared
.space 
= createObject(REDIS_STRING
,sdsnew(" ")); 
1618     shared
.colon 
= createObject(REDIS_STRING
,sdsnew(":")); 
1619     shared
.plus 
= createObject(REDIS_STRING
,sdsnew("+")); 
1620     shared
.select0 
= createStringObject("select 0\r\n",10); 
1621     shared
.select1 
= createStringObject("select 1\r\n",10); 
1622     shared
.select2 
= createStringObject("select 2\r\n",10); 
1623     shared
.select3 
= createStringObject("select 3\r\n",10); 
1624     shared
.select4 
= createStringObject("select 4\r\n",10); 
1625     shared
.select5 
= createStringObject("select 5\r\n",10); 
1626     shared
.select6 
= createStringObject("select 6\r\n",10); 
1627     shared
.select7 
= createStringObject("select 7\r\n",10); 
1628     shared
.select8 
= createStringObject("select 8\r\n",10); 
1629     shared
.select9 
= createStringObject("select 9\r\n",10); 
1630     shared
.messagebulk 
= createStringObject("$7\r\nmessage\r\n",13); 
1631     shared
.pmessagebulk 
= createStringObject("$8\r\npmessage\r\n",14); 
1632     shared
.subscribebulk 
= createStringObject("$9\r\nsubscribe\r\n",15); 
1633     shared
.unsubscribebulk 
= createStringObject("$11\r\nunsubscribe\r\n",18); 
1634     shared
.psubscribebulk 
= createStringObject("$10\r\npsubscribe\r\n",17); 
1635     shared
.punsubscribebulk 
= createStringObject("$12\r\npunsubscribe\r\n",19); 
1636     shared
.mbulk3 
= createStringObject("*3\r\n",4); 
1637     shared
.mbulk4 
= createStringObject("*4\r\n",4); 
1638     for (j 
= 0; j 
< REDIS_SHARED_INTEGERS
; j
++) { 
1639         shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
); 
1640         shared
.integers
[j
]->encoding 
= REDIS_ENCODING_INT
; 
1644 static void appendServerSaveParams(time_t seconds
, int changes
) { 
1645     server
.saveparams 
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1)); 
1646     server
.saveparams
[server
.saveparamslen
].seconds 
= seconds
; 
1647     server
.saveparams
[server
.saveparamslen
].changes 
= changes
; 
1648     server
.saveparamslen
++; 
1651 static void resetServerSaveParams() { 
1652     zfree(server
.saveparams
); 
1653     server
.saveparams 
= NULL
; 
1654     server
.saveparamslen 
= 0; 
1657 static void initServerConfig() { 
1658     server
.dbnum 
= REDIS_DEFAULT_DBNUM
; 
1659     server
.port 
= REDIS_SERVERPORT
; 
1660     server
.verbosity 
= REDIS_VERBOSE
; 
1661     server
.maxidletime 
= REDIS_MAXIDLETIME
; 
1662     server
.saveparams 
= NULL
; 
1663     server
.logfile 
= NULL
; /* NULL = log on standard output */ 
1664     server
.bindaddr 
= NULL
; 
1665     server
.glueoutputbuf 
= 1; 
1666     server
.daemonize 
= 0; 
1667     server
.appendonly 
= 0; 
1668     server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1669     server
.lastfsync 
= time(NULL
); 
1670     server
.appendfd 
= -1; 
1671     server
.appendseldb 
= -1; /* Make sure the first time will not match */ 
1672     server
.pidfile 
= zstrdup("/var/run/redis.pid"); 
1673     server
.dbfilename 
= zstrdup("dump.rdb"); 
1674     server
.appendfilename 
= zstrdup("appendonly.aof"); 
1675     server
.requirepass 
= NULL
; 
1676     server
.rdbcompression 
= 1; 
1677     server
.activerehashing 
= 1; 
1678     server
.maxclients 
= 0; 
1679     server
.blpop_blocked_clients 
= 0; 
1680     server
.maxmemory 
= 0; 
1681     server
.vm_enabled 
= 0; 
1682     server
.vm_swap_file 
= zstrdup("/tmp/redis-%p.vm"); 
1683     server
.vm_page_size 
= 256;          /* 256 bytes per page */ 
1684     server
.vm_pages 
= 1024*1024*100;    /* 104 millions of pages */ 
1685     server
.vm_max_memory 
= 1024LL*1024*1024*1; /* 1 GB of RAM */ 
1686     server
.vm_max_threads 
= 4; 
1687     server
.vm_blocked_clients 
= 0; 
1688     server
.hash_max_zipmap_entries 
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
; 
1689     server
.hash_max_zipmap_value 
= REDIS_HASH_MAX_ZIPMAP_VALUE
; 
1691     resetServerSaveParams(); 
1693     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */ 
1694     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */ 
1695     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ 
1696     /* Replication related */ 
1698     server
.masterauth 
= NULL
; 
1699     server
.masterhost 
= NULL
; 
1700     server
.masterport 
= 6379; 
1701     server
.master 
= NULL
; 
1702     server
.replstate 
= REDIS_REPL_NONE
; 
1704     /* Double constants initialization */ 
1706     R_PosInf 
= 1.0/R_Zero
; 
1707     R_NegInf 
= -1.0/R_Zero
; 
1708     R_Nan 
= R_Zero
/R_Zero
; 
1711 static void initServer() { 
1714     signal(SIGHUP
, SIG_IGN
); 
1715     signal(SIGPIPE
, SIG_IGN
); 
1716     setupSigSegvAction(); 
1718     server
.devnull 
= fopen("/dev/null","w"); 
1719     if (server
.devnull 
== NULL
) { 
1720         redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
); 
1723     server
.clients 
= listCreate(); 
1724     server
.slaves 
= listCreate(); 
1725     server
.monitors 
= listCreate(); 
1726     server
.objfreelist 
= listCreate(); 
1727     createSharedObjects(); 
1728     server
.el 
= aeCreateEventLoop(); 
1729     server
.db 
= zmalloc(sizeof(redisDb
)*server
.dbnum
); 
1730     server
.fd 
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
); 
1731     if (server
.fd 
== -1) { 
1732         redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
); 
1735     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1736         server
.db
[j
].dict 
= dictCreate(&dbDictType
,NULL
); 
1737         server
.db
[j
].expires 
= dictCreate(&keyptrDictType
,NULL
); 
1738         server
.db
[j
].blockingkeys 
= dictCreate(&keylistDictType
,NULL
); 
1739         if (server
.vm_enabled
) 
1740             server
.db
[j
].io_keys 
= dictCreate(&keylistDictType
,NULL
); 
1741         server
.db
[j
].id 
= j
; 
1743     server
.pubsub_channels 
= dictCreate(&keylistDictType
,NULL
); 
1744     server
.pubsub_patterns 
= listCreate(); 
1745     listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
); 
1746     listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
); 
1747     server
.cronloops 
= 0; 
1748     server
.bgsavechildpid 
= -1; 
1749     server
.bgrewritechildpid 
= -1; 
1750     server
.bgrewritebuf 
= sdsempty(); 
1751     server
.aofbuf 
= sdsempty(); 
1752     server
.lastsave 
= time(NULL
); 
1754     server
.stat_numcommands 
= 0; 
1755     server
.stat_numconnections 
= 0; 
1756     server
.stat_expiredkeys 
= 0; 
1757     server
.stat_starttime 
= time(NULL
); 
1758     server
.unixtime 
= time(NULL
); 
1759     aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
); 
1760     if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
, 
1761         acceptHandler
, NULL
) == AE_ERR
) oom("creating file event"); 
1763     if (server
.appendonly
) { 
1764         server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
1765         if (server
.appendfd 
== -1) { 
1766             redisLog(REDIS_WARNING
, "Can't open the append-only file: %s", 
1772     if (server
.vm_enabled
) vmInit(); 
1775 /* Empty the whole database */ 
1776 static long long emptyDb() { 
1778     long long removed 
= 0; 
1780     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1781         removed 
+= dictSize(server
.db
[j
].dict
); 
1782         dictEmpty(server
.db
[j
].dict
); 
1783         dictEmpty(server
.db
[j
].expires
); 
1788 static int yesnotoi(char *s
) { 
1789     if (!strcasecmp(s
,"yes")) return 1; 
1790     else if (!strcasecmp(s
,"no")) return 0; 
1794 /* I agree, this is a very rudimental way to load a configuration... 
1795    will improve later if the config gets more complex */ 
1796 static void loadServerConfig(char *filename
) { 
1798     char buf
[REDIS_CONFIGLINE_MAX
+1], *err 
= NULL
; 
1802     if (filename
[0] == '-' && filename
[1] == '\0') 
1805         if ((fp 
= fopen(filename
,"r")) == NULL
) { 
1806             redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
); 
1811     while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) { 
1817         line 
= sdstrim(line
," \t\r\n"); 
1819         /* Skip comments and blank lines*/ 
1820         if (line
[0] == '#' || line
[0] == '\0') { 
1825         /* Split into arguments */ 
1826         argv 
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
); 
1827         sdstolower(argv
[0]); 
1829         /* Execute config directives */ 
1830         if (!strcasecmp(argv
[0],"timeout") && argc 
== 2) { 
1831             server
.maxidletime 
= atoi(argv
[1]); 
1832             if (server
.maxidletime 
< 0) { 
1833                 err 
= "Invalid timeout value"; goto loaderr
; 
1835         } else if (!strcasecmp(argv
[0],"port") && argc 
== 2) { 
1836             server
.port 
= atoi(argv
[1]); 
1837             if (server
.port 
< 1 || server
.port 
> 65535) { 
1838                 err 
= "Invalid port"; goto loaderr
; 
1840         } else if (!strcasecmp(argv
[0],"bind") && argc 
== 2) { 
1841             server
.bindaddr 
= zstrdup(argv
[1]); 
1842         } else if (!strcasecmp(argv
[0],"save") && argc 
== 3) { 
1843             int seconds 
= atoi(argv
[1]); 
1844             int changes 
= atoi(argv
[2]); 
1845             if (seconds 
< 1 || changes 
< 0) { 
1846                 err 
= "Invalid save parameters"; goto loaderr
; 
1848             appendServerSaveParams(seconds
,changes
); 
1849         } else if (!strcasecmp(argv
[0],"dir") && argc 
== 2) { 
1850             if (chdir(argv
[1]) == -1) { 
1851                 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s", 
1852                     argv
[1], strerror(errno
)); 
1855         } else if (!strcasecmp(argv
[0],"loglevel") && argc 
== 2) { 
1856             if (!strcasecmp(argv
[1],"debug")) server
.verbosity 
= REDIS_DEBUG
; 
1857             else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity 
= REDIS_VERBOSE
; 
1858             else if (!strcasecmp(argv
[1],"notice")) server
.verbosity 
= REDIS_NOTICE
; 
1859             else if (!strcasecmp(argv
[1],"warning")) server
.verbosity 
= REDIS_WARNING
; 
1861                 err 
= "Invalid log level. Must be one of debug, notice, warning"; 
1864         } else if (!strcasecmp(argv
[0],"logfile") && argc 
== 2) { 
1867             server
.logfile 
= zstrdup(argv
[1]); 
1868             if (!strcasecmp(server
.logfile
,"stdout")) { 
1869                 zfree(server
.logfile
); 
1870                 server
.logfile 
= NULL
; 
1872             if (server
.logfile
) { 
1873                 /* Test if we are able to open the file. The server will not 
1874                  * be able to abort just for this problem later... */ 
1875                 logfp 
= fopen(server
.logfile
,"a"); 
1876                 if (logfp 
== NULL
) { 
1877                     err 
= sdscatprintf(sdsempty(), 
1878                         "Can't open the log file: %s", strerror(errno
)); 
1883         } else if (!strcasecmp(argv
[0],"databases") && argc 
== 2) { 
1884             server
.dbnum 
= atoi(argv
[1]); 
1885             if (server
.dbnum 
< 1) { 
1886                 err 
= "Invalid number of databases"; goto loaderr
; 
1888         } else if (!strcasecmp(argv
[0],"include") && argc 
== 2) { 
1889             loadServerConfig(argv
[1]); 
1890         } else if (!strcasecmp(argv
[0],"maxclients") && argc 
== 2) { 
1891             server
.maxclients 
= atoi(argv
[1]); 
1892         } else if (!strcasecmp(argv
[0],"maxmemory") && argc 
== 2) { 
1893             server
.maxmemory 
= memtoll(argv
[1],NULL
); 
1894         } else if (!strcasecmp(argv
[0],"slaveof") && argc 
== 3) { 
1895             server
.masterhost 
= sdsnew(argv
[1]); 
1896             server
.masterport 
= atoi(argv
[2]); 
1897             server
.replstate 
= REDIS_REPL_CONNECT
; 
1898         } else if (!strcasecmp(argv
[0],"masterauth") && argc 
== 2) { 
1899                 server
.masterauth 
= zstrdup(argv
[1]); 
1900         } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc 
== 2) { 
1901             if ((server
.glueoutputbuf 
= yesnotoi(argv
[1])) == -1) { 
1902                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1904         } else if (!strcasecmp(argv
[0],"rdbcompression") && argc 
== 2) { 
1905             if ((server
.rdbcompression 
= yesnotoi(argv
[1])) == -1) { 
1906                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1908         } else if (!strcasecmp(argv
[0],"activerehashing") && argc 
== 2) { 
1909             if ((server
.activerehashing 
= yesnotoi(argv
[1])) == -1) { 
1910                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1912         } else if (!strcasecmp(argv
[0],"daemonize") && argc 
== 2) { 
1913             if ((server
.daemonize 
= yesnotoi(argv
[1])) == -1) { 
1914                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1916         } else if (!strcasecmp(argv
[0],"appendonly") && argc 
== 2) { 
1917             if ((server
.appendonly 
= yesnotoi(argv
[1])) == -1) { 
1918                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1920         } else if (!strcasecmp(argv
[0],"appendfilename") && argc 
== 2) { 
1921             zfree(server
.appendfilename
); 
1922             server
.appendfilename 
= zstrdup(argv
[1]); 
1923         } else if (!strcasecmp(argv
[0],"appendfsync") && argc 
== 2) { 
1924             if (!strcasecmp(argv
[1],"no")) { 
1925                 server
.appendfsync 
= APPENDFSYNC_NO
; 
1926             } else if (!strcasecmp(argv
[1],"always")) { 
1927                 server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
1928             } else if (!strcasecmp(argv
[1],"everysec")) { 
1929                 server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1931                 err 
= "argument must be 'no', 'always' or 'everysec'"; 
1934         } else if (!strcasecmp(argv
[0],"requirepass") && argc 
== 2) { 
1935             server
.requirepass 
= zstrdup(argv
[1]); 
1936         } else if (!strcasecmp(argv
[0],"pidfile") && argc 
== 2) { 
1937             zfree(server
.pidfile
); 
1938             server
.pidfile 
= zstrdup(argv
[1]); 
1939         } else if (!strcasecmp(argv
[0],"dbfilename") && argc 
== 2) { 
1940             zfree(server
.dbfilename
); 
1941             server
.dbfilename 
= zstrdup(argv
[1]); 
1942         } else if (!strcasecmp(argv
[0],"vm-enabled") && argc 
== 2) { 
1943             if ((server
.vm_enabled 
= yesnotoi(argv
[1])) == -1) { 
1944                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1946         } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc 
== 2) { 
1947             zfree(server
.vm_swap_file
); 
1948             server
.vm_swap_file 
= zstrdup(argv
[1]); 
1949         } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc 
== 2) { 
1950             server
.vm_max_memory 
= memtoll(argv
[1],NULL
); 
1951         } else if (!strcasecmp(argv
[0],"vm-page-size") && argc 
== 2) { 
1952             server
.vm_page_size 
= memtoll(argv
[1], NULL
); 
1953         } else if (!strcasecmp(argv
[0],"vm-pages") && argc 
== 2) { 
1954             server
.vm_pages 
= memtoll(argv
[1], NULL
); 
1955         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
1956             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
1957         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc 
== 2){ 
1958             server
.hash_max_zipmap_entries 
= memtoll(argv
[1], NULL
); 
1959         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc 
== 2){ 
1960             server
.hash_max_zipmap_value 
= memtoll(argv
[1], NULL
); 
1962             err 
= "Bad directive or wrong number of arguments"; goto loaderr
; 
1964         for (j 
= 0; j 
< argc
; j
++) 
1969     if (fp 
!= stdin
) fclose(fp
); 
1973     fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n"); 
1974     fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
); 
1975     fprintf(stderr
, ">>> '%s'\n", line
); 
1976     fprintf(stderr
, "%s\n", err
); 
1980 static void freeClientArgv(redisClient 
*c
) { 
1983     for (j 
= 0; j 
< c
->argc
; j
++) 
1984         decrRefCount(c
->argv
[j
]); 
1985     for (j 
= 0; j 
< c
->mbargc
; j
++) 
1986         decrRefCount(c
->mbargv
[j
]); 
1991 static void freeClient(redisClient 
*c
) { 
1994     /* Note that if the client we are freeing is blocked into a blocking 
1995      * call, we have to set querybuf to NULL *before* to call 
1996      * unblockClientWaitingData() to avoid processInputBuffer() will get 
1997      * called. Also it is important to remove the file events after 
1998      * this, because this call adds the READABLE event. */ 
1999     sdsfree(c
->querybuf
); 
2001     if (c
->flags 
& REDIS_BLOCKED
) 
2002         unblockClientWaitingData(c
); 
2004     /* Unsubscribe from all the pubsub channels */ 
2005     pubsubUnsubscribeAllChannels(c
,0); 
2006     pubsubUnsubscribeAllPatterns(c
,0); 
2007     dictRelease(c
->pubsub_channels
); 
2008     listRelease(c
->pubsub_patterns
); 
2009     /* Obvious cleanup */ 
2010     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
2011     aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2012     listRelease(c
->reply
); 
2015     /* Remove from the list of clients */ 
2016     ln 
= listSearchKey(server
.clients
,c
); 
2017     redisAssert(ln 
!= NULL
); 
2018     listDelNode(server
.clients
,ln
); 
2019     /* Remove from the list of clients waiting for swapped keys */ 
2020     if (c
->flags 
& REDIS_IO_WAIT 
&& listLength(c
->io_keys
) == 0) { 
2021         ln 
= listSearchKey(server
.io_ready_clients
,c
); 
2023             listDelNode(server
.io_ready_clients
,ln
); 
2024             server
.vm_blocked_clients
--; 
2027     while (server
.vm_enabled 
&& listLength(c
->io_keys
)) { 
2028         ln 
= listFirst(c
->io_keys
); 
2029         dontWaitForSwappedKey(c
,ln
->value
); 
2031     listRelease(c
->io_keys
); 
2032     /* Master/slave cleanup */ 
2033     if (c
->flags 
& REDIS_SLAVE
) { 
2034         if (c
->replstate 
== REDIS_REPL_SEND_BULK 
&& c
->repldbfd 
!= -1) 
2036         list 
*l 
= (c
->flags 
& REDIS_MONITOR
) ? server
.monitors 
: server
.slaves
; 
2037         ln 
= listSearchKey(l
,c
); 
2038         redisAssert(ln 
!= NULL
); 
2041     if (c
->flags 
& REDIS_MASTER
) { 
2042         server
.master 
= NULL
; 
2043         server
.replstate 
= REDIS_REPL_CONNECT
; 
2045     /* Release memory */ 
2048     freeClientMultiState(c
); 
2052 #define GLUEREPLY_UP_TO (1024) 
2053 static void glueReplyBuffersIfNeeded(redisClient 
*c
) { 
2055     char buf
[GLUEREPLY_UP_TO
]; 
2060     listRewind(c
->reply
,&li
); 
2061     while((ln 
= listNext(&li
))) { 
2065         objlen 
= sdslen(o
->ptr
); 
2066         if (copylen 
+ objlen 
<= GLUEREPLY_UP_TO
) { 
2067             memcpy(buf
+copylen
,o
->ptr
,objlen
); 
2069             listDelNode(c
->reply
,ln
); 
2071             if (copylen 
== 0) return; 
2075     /* Now the output buffer is empty, add the new single element */ 
2076     o 
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
)); 
2077     listAddNodeHead(c
->reply
,o
); 
2080 static void sendReplyToClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2081     redisClient 
*c 
= privdata
; 
2082     int nwritten 
= 0, totwritten 
= 0, objlen
; 
2085     REDIS_NOTUSED(mask
); 
2087     /* Use writev() if we have enough buffers to send */ 
2088     if (!server
.glueoutputbuf 
&& 
2089         listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD 
&& 
2090         !(c
->flags 
& REDIS_MASTER
)) 
2092         sendReplyToClientWritev(el
, fd
, privdata
, mask
); 
2096     while(listLength(c
->reply
)) { 
2097         if (server
.glueoutputbuf 
&& listLength(c
->reply
) > 1) 
2098             glueReplyBuffersIfNeeded(c
); 
2100         o 
= listNodeValue(listFirst(c
->reply
)); 
2101         objlen 
= sdslen(o
->ptr
); 
2104             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2108         if (c
->flags 
& REDIS_MASTER
) { 
2109             /* Don't reply to a master */ 
2110             nwritten 
= objlen 
- c
->sentlen
; 
2112             nwritten 
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen 
- c
->sentlen
); 
2113             if (nwritten 
<= 0) break; 
2115         c
->sentlen 
+= nwritten
; 
2116         totwritten 
+= nwritten
; 
2117         /* If we fully sent the object on head go to the next one */ 
2118         if (c
->sentlen 
== objlen
) { 
2119             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2122         /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT 
2123          * bytes, in a single threaded server it's a good idea to serve 
2124          * other clients as well, even if a very large request comes from 
2125          * super fast link that is always able to accept data (in real world 
2126          * scenario think about 'KEYS *' against the loopback interfae) */ 
2127         if (totwritten 
> REDIS_MAX_WRITE_PER_EVENT
) break; 
2129     if (nwritten 
== -1) { 
2130         if (errno 
== EAGAIN
) { 
2133             redisLog(REDIS_VERBOSE
, 
2134                 "Error writing to client: %s", strerror(errno
)); 
2139     if (totwritten 
> 0) c
->lastinteraction 
= time(NULL
); 
2140     if (listLength(c
->reply
) == 0) { 
2142         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2146 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) 
2148     redisClient 
*c 
= privdata
; 
2149     int nwritten 
= 0, totwritten 
= 0, objlen
, willwrite
; 
2151     struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
]; 
2152     int offset
, ion 
= 0; 
2154     REDIS_NOTUSED(mask
); 
2157     while (listLength(c
->reply
)) { 
2158         offset 
= c
->sentlen
; 
2162         /* fill-in the iov[] array */ 
2163         for(node 
= listFirst(c
->reply
); node
; node 
= listNextNode(node
)) { 
2164             o 
= listNodeValue(node
); 
2165             objlen 
= sdslen(o
->ptr
); 
2167             if (totwritten 
+ objlen 
- offset 
> REDIS_MAX_WRITE_PER_EVENT
) 
2170             if(ion 
== REDIS_WRITEV_IOVEC_COUNT
) 
2171                 break; /* no more iovecs */ 
2173             iov
[ion
].iov_base 
= ((char*)o
->ptr
) + offset
; 
2174             iov
[ion
].iov_len 
= objlen 
- offset
; 
2175             willwrite 
+= objlen 
- offset
; 
2176             offset 
= 0; /* just for the first item */ 
2183         /* write all collected blocks at once */ 
2184         if((nwritten 
= writev(fd
, iov
, ion
)) < 0) { 
2185             if (errno 
!= EAGAIN
) { 
2186                 redisLog(REDIS_VERBOSE
, 
2187                          "Error writing to client: %s", strerror(errno
)); 
2194         totwritten 
+= nwritten
; 
2195         offset 
= c
->sentlen
; 
2197         /* remove written robjs from c->reply */ 
2198         while (nwritten 
&& listLength(c
->reply
)) { 
2199             o 
= listNodeValue(listFirst(c
->reply
)); 
2200             objlen 
= sdslen(o
->ptr
); 
2202             if(nwritten 
>= objlen 
- offset
) { 
2203                 listDelNode(c
->reply
, listFirst(c
->reply
)); 
2204                 nwritten 
-= objlen 
- offset
; 
2208                 c
->sentlen 
+= nwritten
; 
2216         c
->lastinteraction 
= time(NULL
); 
2218     if (listLength(c
->reply
) == 0) { 
2220         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2224 static struct redisCommand 
*lookupCommand(char *name
) { 
2226     while(cmdTable
[j
].name 
!= NULL
) { 
2227         if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
]; 
2233 /* resetClient prepare the client to process the next command */ 
2234 static void resetClient(redisClient 
*c
) { 
2240 /* Call() is the core of Redis execution of a command */ 
2241 static void call(redisClient 
*c
, struct redisCommand 
*cmd
) { 
2244     dirty 
= server
.dirty
; 
2246     dirty 
= server
.dirty
-dirty
; 
2248     if (server
.appendonly 
&& dirty
) 
2249         feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2250     if ((dirty 
|| cmd
->flags 
& REDIS_CMD_FORCE_REPLICATION
) && 
2251         listLength(server
.slaves
)) 
2252         replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
); 
2253     if (listLength(server
.monitors
)) 
2254         replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
); 
2255     server
.stat_numcommands
++; 
2258 /* If this function gets called we already read a whole 
2259  * command, argments are in the client argv/argc fields. 
2260  * processCommand() execute the command or prepare the 
2261  * server for a bulk read from the client. 
2263  * If 1 is returned the client is still alive and valid and 
2264  * and other operations can be performed by the caller. Otherwise 
2265  * if 0 is returned the client was destroied (i.e. after QUIT). */ 
2266 static int processCommand(redisClient 
*c
) { 
2267     struct redisCommand 
*cmd
; 
2269     /* Free some memory if needed (maxmemory setting) */ 
2270     if (server
.maxmemory
) freeMemoryIfNeeded(); 
2272     /* Handle the multi bulk command type. This is an alternative protocol 
2273      * supported by Redis in order to receive commands that are composed of 
2274      * multiple binary-safe "bulk" arguments. The latency of processing is 
2275      * a bit higher but this allows things like multi-sets, so if this 
2276      * protocol is used only for MSET and similar commands this is a big win. */ 
2277     if (c
->multibulk 
== 0 && c
->argc 
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') { 
2278         c
->multibulk 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2279         if (c
->multibulk 
<= 0) { 
2283             decrRefCount(c
->argv
[c
->argc
-1]); 
2287     } else if (c
->multibulk
) { 
2288         if (c
->bulklen 
== -1) { 
2289             if (((char*)c
->argv
[0]->ptr
)[0] != '$') { 
2290                 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n")); 
2294                 int bulklen 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2295                 decrRefCount(c
->argv
[0]); 
2296                 if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2298                     addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2303                 c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2307             c
->mbargv 
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1)); 
2308             c
->mbargv
[c
->mbargc
] = c
->argv
[0]; 
2312             if (c
->multibulk 
== 0) { 
2316                 /* Here we need to swap the multi-bulk argc/argv with the 
2317                  * normal argc/argv of the client structure. */ 
2319                 c
->argv 
= c
->mbargv
; 
2320                 c
->mbargv 
= auxargv
; 
2323                 c
->argc 
= c
->mbargc
; 
2324                 c
->mbargc 
= auxargc
; 
2326                 /* We need to set bulklen to something different than -1 
2327                  * in order for the code below to process the command without 
2328                  * to try to read the last argument of a bulk command as 
2329                  * a special argument. */ 
2331                 /* continue below and process the command */ 
2338     /* -- end of multi bulk commands processing -- */ 
2340     /* The QUIT command is handled as a special case. Normal command 
2341      * procs are unable to close the client connection safely */ 
2342     if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) { 
2347     /* Now lookup the command and check ASAP about trivial error conditions 
2348      * such wrong arity, bad command name and so forth. */ 
2349     cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
2352             sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", 
2353                 (char*)c
->argv
[0]->ptr
)); 
2356     } else if ((cmd
->arity 
> 0 && cmd
->arity 
!= c
->argc
) || 
2357                (c
->argc 
< -cmd
->arity
)) { 
2359             sdscatprintf(sdsempty(), 
2360                 "-ERR wrong number of arguments for '%s' command\r\n", 
2364     } else if (cmd
->flags 
& REDIS_CMD_BULK 
&& c
->bulklen 
== -1) { 
2365         /* This is a bulk command, we have to read the last argument yet. */ 
2366         int bulklen 
= atoi(c
->argv
[c
->argc
-1]->ptr
); 
2368         decrRefCount(c
->argv
[c
->argc
-1]); 
2369         if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2371             addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2376         c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2377         /* It is possible that the bulk read is already in the 
2378          * buffer. Check this condition and handle it accordingly. 
2379          * This is just a fast path, alternative to call processInputBuffer(). 
2380          * It's a good idea since the code is small and this condition 
2381          * happens most of the times. */ 
2382         if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) { 
2383             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2385             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2387             /* Otherwise return... there is to read the last argument 
2388              * from the socket. */ 
2392     /* Let's try to encode the bulk object to save space. */ 
2393     if (cmd
->flags 
& REDIS_CMD_BULK
) 
2394         c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]); 
2396     /* Check if the user is authenticated */ 
2397     if (server
.requirepass 
&& !c
->authenticated 
&& cmd
->proc 
!= authCommand
) { 
2398         addReplySds(c
,sdsnew("-ERR operation not permitted\r\n")); 
2403     /* Handle the maxmemory directive */ 
2404     if (server
.maxmemory 
&& (cmd
->flags 
& REDIS_CMD_DENYOOM
) && 
2405         zmalloc_used_memory() > server
.maxmemory
) 
2407         addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); 
2412     /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ 
2413     if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0) 
2415         cmd
->proc 
!= subscribeCommand 
&& cmd
->proc 
!= unsubscribeCommand 
&& 
2416         cmd
->proc 
!= psubscribeCommand 
&& cmd
->proc 
!= punsubscribeCommand
) { 
2417         addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n")); 
2422     /* Exec the command */ 
2423     if (c
->flags 
& REDIS_MULTI 
&& cmd
->proc 
!= execCommand 
&& cmd
->proc 
!= discardCommand
) { 
2424         queueMultiCommand(c
,cmd
); 
2425         addReply(c
,shared
.queued
); 
2427         if (server
.vm_enabled 
&& server
.vm_max_threads 
> 0 && 
2428             blockClientOnSwappedKeys(c
,cmd
)) return 1; 
2432     /* Prepare the client for the next command */ 
2437 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
) { 
2442     /* We need 1+(ARGS*3) objects since commands are using the new protocol 
2443      * and we one 1 object for the first "*<count>\r\n" multibulk count, then 
2444      * for every additional object we have "$<count>\r\n" + object + "\r\n". */ 
2445     robj 
*static_outv
[REDIS_STATIC_ARGS
*3+1]; 
2448     if (argc 
<= REDIS_STATIC_ARGS
) { 
2451         outv 
= zmalloc(sizeof(robj
*)*(argc
*3+1)); 
2454     lenobj 
= createObject(REDIS_STRING
, 
2455             sdscatprintf(sdsempty(), "*%d\r\n", argc
)); 
2456     lenobj
->refcount 
= 0; 
2457     outv
[outc
++] = lenobj
; 
2458     for (j 
= 0; j 
< argc
; j
++) { 
2459         lenobj 
= createObject(REDIS_STRING
, 
2460             sdscatprintf(sdsempty(),"$%lu\r\n", 
2461                 (unsigned long) stringObjectLen(argv
[j
]))); 
2462         lenobj
->refcount 
= 0; 
2463         outv
[outc
++] = lenobj
; 
2464         outv
[outc
++] = argv
[j
]; 
2465         outv
[outc
++] = shared
.crlf
; 
2468     /* Increment all the refcounts at start and decrement at end in order to 
2469      * be sure to free objects if there is no slave in a replication state 
2470      * able to be feed with commands */ 
2471     for (j 
= 0; j 
< outc
; j
++) incrRefCount(outv
[j
]); 
2472     listRewind(slaves
,&li
); 
2473     while((ln 
= listNext(&li
))) { 
2474         redisClient 
*slave 
= ln
->value
; 
2476         /* Don't feed slaves that are still waiting for BGSAVE to start */ 
2477         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) continue; 
2479         /* Feed all the other slaves, MONITORs and so on */ 
2480         if (slave
->slaveseldb 
!= dictid
) { 
2484             case 0: selectcmd 
= shared
.select0
; break; 
2485             case 1: selectcmd 
= shared
.select1
; break; 
2486             case 2: selectcmd 
= shared
.select2
; break; 
2487             case 3: selectcmd 
= shared
.select3
; break; 
2488             case 4: selectcmd 
= shared
.select4
; break; 
2489             case 5: selectcmd 
= shared
.select5
; break; 
2490             case 6: selectcmd 
= shared
.select6
; break; 
2491             case 7: selectcmd 
= shared
.select7
; break; 
2492             case 8: selectcmd 
= shared
.select8
; break; 
2493             case 9: selectcmd 
= shared
.select9
; break; 
2495                 selectcmd 
= createObject(REDIS_STRING
, 
2496                     sdscatprintf(sdsempty(),"select %d\r\n",dictid
)); 
2497                 selectcmd
->refcount 
= 0; 
2500             addReply(slave
,selectcmd
); 
2501             slave
->slaveseldb 
= dictid
; 
2503         for (j 
= 0; j 
< outc
; j
++) addReply(slave
,outv
[j
]); 
2505     for (j 
= 0; j 
< outc
; j
++) decrRefCount(outv
[j
]); 
2506     if (outv 
!= static_outv
) zfree(outv
); 
2509 static sds 
sdscatrepr(sds s
, char *p
, size_t len
) { 
2510     s 
= sdscatlen(s
,"\"",1); 
2515             s 
= sdscatprintf(s
,"\\%c",*p
); 
2517         case '\n': s 
= sdscatlen(s
,"\\n",1); break; 
2518         case '\r': s 
= sdscatlen(s
,"\\r",1); break; 
2519         case '\t': s 
= sdscatlen(s
,"\\t",1); break; 
2520         case '\a': s 
= sdscatlen(s
,"\\a",1); break; 
2521         case '\b': s 
= sdscatlen(s
,"\\b",1); break; 
2524                 s 
= sdscatprintf(s
,"%c",*p
); 
2526                 s 
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
); 
2531     return sdscatlen(s
,"\"",1); 
2534 static void replicationFeedMonitors(list 
*monitors
, int dictid
, robj 
**argv
, int argc
) { 
2538     sds cmdrepr 
= sdsnew("+"); 
2542     gettimeofday(&tv
,NULL
); 
2543     cmdrepr 
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
); 
2544     if (dictid 
!= 0) cmdrepr 
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
); 
2546     for (j 
= 0; j 
< argc
; j
++) { 
2547         if (argv
[j
]->encoding 
== REDIS_ENCODING_INT
) { 
2548             cmdrepr 
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
); 
2550             cmdrepr 
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
, 
2551                         sdslen(argv
[j
]->ptr
)); 
2554             cmdrepr 
= sdscatlen(cmdrepr
," ",1); 
2556     cmdrepr 
= sdscatlen(cmdrepr
,"\r\n",2); 
2557     cmdobj 
= createObject(REDIS_STRING
,cmdrepr
); 
2559     listRewind(monitors
,&li
); 
2560     while((ln 
= listNext(&li
))) { 
2561         redisClient 
*monitor 
= ln
->value
; 
2562         addReply(monitor
,cmdobj
); 
2564     decrRefCount(cmdobj
); 
2567 static void processInputBuffer(redisClient 
*c
) { 
2569     /* Before to process the input buffer, make sure the client is not 
2570      * waitig for a blocking operation such as BLPOP. Note that the first 
2571      * iteration the client is never blocked, otherwise the processInputBuffer 
2572      * would not be called at all, but after the execution of the first commands 
2573      * in the input buffer the client may be blocked, and the "goto again" 
2574      * will try to reiterate. The following line will make it return asap. */ 
2575     if (c
->flags 
& REDIS_BLOCKED 
|| c
->flags 
& REDIS_IO_WAIT
) return; 
2576     if (c
->bulklen 
== -1) { 
2577         /* Read the first line of the query */ 
2578         char *p 
= strchr(c
->querybuf
,'\n'); 
2585             query 
= c
->querybuf
; 
2586             c
->querybuf 
= sdsempty(); 
2587             querylen 
= 1+(p
-(query
)); 
2588             if (sdslen(query
) > querylen
) { 
2589                 /* leave data after the first line of the query in the buffer */ 
2590                 c
->querybuf 
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
); 
2592             *p 
= '\0'; /* remove "\n" */ 
2593             if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */ 
2594             sdsupdatelen(query
); 
2596             /* Now we can split the query in arguments */ 
2597             argv 
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
); 
2600             if (c
->argv
) zfree(c
->argv
); 
2601             c
->argv 
= zmalloc(sizeof(robj
*)*argc
); 
2603             for (j 
= 0; j 
< argc
; j
++) { 
2604                 if (sdslen(argv
[j
])) { 
2605                     c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]); 
2613                 /* Execute the command. If the client is still valid 
2614                  * after processCommand() return and there is something 
2615                  * on the query buffer try to process the next command. */ 
2616                 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2618                 /* Nothing to process, argc == 0. Just process the query 
2619                  * buffer if it's not empty or return to the caller */ 
2620                 if (sdslen(c
->querybuf
)) goto again
; 
2623         } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) { 
2624             redisLog(REDIS_VERBOSE
, "Client protocol error"); 
2629         /* Bulk read handling. Note that if we are at this point 
2630            the client already sent a command terminated with a newline, 
2631            we are reading the bulk data that is actually the last 
2632            argument of the command. */ 
2633         int qbl 
= sdslen(c
->querybuf
); 
2635         if (c
->bulklen 
<= qbl
) { 
2636             /* Copy everything but the final CRLF as final argument */ 
2637             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2639             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2640             /* Process the command. If the client is still valid after 
2641              * the processing and there is more data in the buffer 
2642              * try to parse it. */ 
2643             if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2649 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2650     redisClient 
*c 
= (redisClient
*) privdata
; 
2651     char buf
[REDIS_IOBUF_LEN
]; 
2654     REDIS_NOTUSED(mask
); 
2656     nread 
= read(fd
, buf
, REDIS_IOBUF_LEN
); 
2658         if (errno 
== EAGAIN
) { 
2661             redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
)); 
2665     } else if (nread 
== 0) { 
2666         redisLog(REDIS_VERBOSE
, "Client closed connection"); 
2671         c
->querybuf 
= sdscatlen(c
->querybuf
, buf
, nread
); 
2672         c
->lastinteraction 
= time(NULL
); 
2676     processInputBuffer(c
); 
2679 static int selectDb(redisClient 
*c
, int id
) { 
2680     if (id 
< 0 || id 
>= server
.dbnum
) 
2682     c
->db 
= &server
.db
[id
]; 
2686 static void *dupClientReplyValue(void *o
) { 
2687     incrRefCount((robj
*)o
); 
2691 static int listMatchObjects(void *a
, void *b
) { 
2692     return equalStringObjects(a
,b
); 
2695 static redisClient 
*createClient(int fd
) { 
2696     redisClient 
*c 
= zmalloc(sizeof(*c
)); 
2698     anetNonBlock(NULL
,fd
); 
2699     anetTcpNoDelay(NULL
,fd
); 
2700     if (!c
) return NULL
; 
2703     c
->querybuf 
= sdsempty(); 
2712     c
->lastinteraction 
= time(NULL
); 
2713     c
->authenticated 
= 0; 
2714     c
->replstate 
= REDIS_REPL_NONE
; 
2715     c
->reply 
= listCreate(); 
2716     listSetFreeMethod(c
->reply
,decrRefCount
); 
2717     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
2718     c
->blockingkeys 
= NULL
; 
2719     c
->blockingkeysnum 
= 0; 
2720     c
->io_keys 
= listCreate(); 
2721     listSetFreeMethod(c
->io_keys
,decrRefCount
); 
2722     c
->pubsub_channels 
= dictCreate(&setDictType
,NULL
); 
2723     c
->pubsub_patterns 
= listCreate(); 
2724     listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
); 
2725     listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
); 
2726     if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
2727         readQueryFromClient
, c
) == AE_ERR
) { 
2731     listAddNodeTail(server
.clients
,c
); 
2732     initClientMultiState(c
); 
2736 static void addReply(redisClient 
*c
, robj 
*obj
) { 
2737     if (listLength(c
->reply
) == 0 && 
2738         (c
->replstate 
== REDIS_REPL_NONE 
|| 
2739          c
->replstate 
== REDIS_REPL_ONLINE
) && 
2740         aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
, 
2741         sendReplyToClient
, c
) == AE_ERR
) return; 
2743     if (server
.vm_enabled 
&& obj
->storage 
!= REDIS_VM_MEMORY
) { 
2744         obj 
= dupStringObject(obj
); 
2745         obj
->refcount 
= 0; /* getDecodedObject() will increment the refcount */ 
2747     listAddNodeTail(c
->reply
,getDecodedObject(obj
)); 
2750 static void addReplySds(redisClient 
*c
, sds s
) { 
2751     robj 
*o 
= createObject(REDIS_STRING
,s
); 
2756 static void addReplyDouble(redisClient 
*c
, double d
) { 
2759     snprintf(buf
,sizeof(buf
),"%.17g",d
); 
2760     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", 
2761         (unsigned long) strlen(buf
),buf
)); 
2764 static void addReplyLongLong(redisClient 
*c
, long long ll
) { 
2769         addReply(c
,shared
.czero
); 
2771     } else if (ll 
== 1) { 
2772         addReply(c
,shared
.cone
); 
2776     len 
= ll2string(buf
+1,sizeof(buf
)-1,ll
); 
2779     addReplySds(c
,sdsnewlen(buf
,len
+3)); 
2782 static void addReplyUlong(redisClient 
*c
, unsigned long ul
) { 
2787         addReply(c
,shared
.czero
); 
2789     } else if (ul 
== 1) { 
2790         addReply(c
,shared
.cone
); 
2793     len 
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
); 
2794     addReplySds(c
,sdsnewlen(buf
,len
)); 
2797 static void addReplyBulkLen(redisClient 
*c
, robj 
*obj
) { 
2801     if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
2802         len 
= sdslen(obj
->ptr
); 
2804         long n 
= (long)obj
->ptr
; 
2806         /* Compute how many bytes will take this integer as a radix 10 string */ 
2812         while((n 
= n
/10) != 0) { 
2817     intlen 
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
); 
2818     buf
[intlen
+1] = '\r'; 
2819     buf
[intlen
+2] = '\n'; 
2820     addReplySds(c
,sdsnewlen(buf
,intlen
+3)); 
2823 static void addReplyBulk(redisClient 
*c
, robj 
*obj
) { 
2824     addReplyBulkLen(c
,obj
); 
2826     addReply(c
,shared
.crlf
); 
2829 /* In the CONFIG command we need to add vanilla C string as bulk replies */ 
2830 static void addReplyBulkCString(redisClient 
*c
, char *s
) { 
2832         addReply(c
,shared
.nullbulk
); 
2834         robj 
*o 
= createStringObject(s
,strlen(s
)); 
2840 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2845     REDIS_NOTUSED(mask
); 
2846     REDIS_NOTUSED(privdata
); 
2848     cfd 
= anetAccept(server
.neterr
, fd
, cip
, &cport
); 
2849     if (cfd 
== AE_ERR
) { 
2850         redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
); 
2853     redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
); 
2854     if ((c 
= createClient(cfd
)) == NULL
) { 
2855         redisLog(REDIS_WARNING
,"Error allocating resoures for the client"); 
2856         close(cfd
); /* May be already closed, just ingore errors */ 
2859     /* If maxclient directive is set and this is one client more... close the 
2860      * connection. Note that we create the client instead to check before 
2861      * for this condition, since now the socket is already set in nonblocking 
2862      * mode and we can send an error for free using the Kernel I/O */ 
2863     if (server
.maxclients 
&& listLength(server
.clients
) > server
.maxclients
) { 
2864         char *err 
= "-ERR max number of clients reached\r\n"; 
2866         /* That's a best effort error message, don't check write errors */ 
2867         if (write(c
->fd
,err
,strlen(err
)) == -1) { 
2868             /* Nothing to do, Just to avoid the warning... */ 
2873     server
.stat_numconnections
++; 
2876 /* ======================= Redis objects implementation ===================== */ 
2878 static robj 
*createObject(int type
, void *ptr
) { 
2881     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2882     if (listLength(server
.objfreelist
)) { 
2883         listNode 
*head 
= listFirst(server
.objfreelist
); 
2884         o 
= listNodeValue(head
); 
2885         listDelNode(server
.objfreelist
,head
); 
2886         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2888         if (server
.vm_enabled
) { 
2889             pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
2890             o 
= zmalloc(sizeof(*o
)); 
2892             o 
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
)); 
2896     o
->encoding 
= REDIS_ENCODING_RAW
; 
2899     if (server
.vm_enabled
) { 
2900         /* Note that this code may run in the context of an I/O thread 
2901          * and accessing to server.unixtime in theory is an error 
2902          * (no locks). But in practice this is safe, and even if we read 
2903          * garbage Redis will not fail, as it's just a statistical info */ 
2904         o
->vm
.atime 
= server
.unixtime
; 
2905         o
->storage 
= REDIS_VM_MEMORY
; 
2910 static robj 
*createStringObject(char *ptr
, size_t len
) { 
2911     return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
)); 
2914 static robj 
*createStringObjectFromLongLong(long long value
) { 
2916     if (value 
>= 0 && value 
< REDIS_SHARED_INTEGERS
) { 
2917         incrRefCount(shared
.integers
[value
]); 
2918         o 
= shared
.integers
[value
]; 
2920         o 
= createObject(REDIS_STRING
, NULL
); 
2921         if (value 
>= LONG_MIN 
&& value 
<= LONG_MAX
) { 
2922             o
->encoding 
= REDIS_ENCODING_INT
; 
2923             o
->ptr 
= (void*)((long)value
); 
2925             o 
= createObject(REDIS_STRING
,sdsfromlonglong(value
)); 
2931 static robj 
*dupStringObject(robj 
*o
) { 
2932     assert(o
->encoding 
== REDIS_ENCODING_RAW
); 
2933     return createStringObject(o
->ptr
,sdslen(o
->ptr
)); 
2936 static robj 
*createListObject(void) { 
2937     list 
*l 
= listCreate(); 
2939     listSetFreeMethod(l
,decrRefCount
); 
2940     return createObject(REDIS_LIST
,l
); 
2943 static robj 
*createSetObject(void) { 
2944     dict 
*d 
= dictCreate(&setDictType
,NULL
); 
2945     return createObject(REDIS_SET
,d
); 
2948 static robj 
*createHashObject(void) { 
2949     /* All the Hashes start as zipmaps. Will be automatically converted 
2950      * into hash tables if there are enough elements or big elements 
2952     unsigned char *zm 
= zipmapNew(); 
2953     robj 
*o 
= createObject(REDIS_HASH
,zm
); 
2954     o
->encoding 
= REDIS_ENCODING_ZIPMAP
; 
2958 static robj 
*createZsetObject(void) { 
2959     zset 
*zs 
= zmalloc(sizeof(*zs
)); 
2961     zs
->dict 
= dictCreate(&zsetDictType
,NULL
); 
2962     zs
->zsl 
= zslCreate(); 
2963     return createObject(REDIS_ZSET
,zs
); 
2966 static void freeStringObject(robj 
*o
) { 
2967     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
2972 static void freeListObject(robj 
*o
) { 
2973     listRelease((list
*) o
->ptr
); 
2976 static void freeSetObject(robj 
*o
) { 
2977     dictRelease((dict
*) o
->ptr
); 
2980 static void freeZsetObject(robj 
*o
) { 
2983     dictRelease(zs
->dict
); 
2988 static void freeHashObject(robj 
*o
) { 
2989     switch (o
->encoding
) { 
2990     case REDIS_ENCODING_HT
: 
2991         dictRelease((dict
*) o
->ptr
); 
2993     case REDIS_ENCODING_ZIPMAP
: 
2997         redisPanic("Unknown hash encoding type"); 
3002 static void incrRefCount(robj 
*o
) { 
3006 static void decrRefCount(void *obj
) { 
3009     if (o
->refcount 
<= 0) redisPanic("decrRefCount against refcount <= 0"); 
3010     /* Object is a key of a swapped out value, or in the process of being 
3012     if (server
.vm_enabled 
&& 
3013         (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
)) 
3015         if (o
->storage 
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
); 
3016         redisAssert(o
->type 
== REDIS_STRING
); 
3017         freeStringObject(o
); 
3018         vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
); 
3019         pthread_mutex_lock(&server
.obj_freelist_mutex
); 
3020         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
3021             !listAddNodeHead(server
.objfreelist
,o
)) 
3023         pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3024         server
.vm_stats_swapped_objects
--; 
3027     /* Object is in memory, or in the process of being swapped out. */ 
3028     if (--(o
->refcount
) == 0) { 
3029         if (server
.vm_enabled 
&& o
->storage 
== REDIS_VM_SWAPPING
) 
3030             vmCancelThreadedIOJob(obj
); 
3032         case REDIS_STRING
: freeStringObject(o
); break; 
3033         case REDIS_LIST
: freeListObject(o
); break; 
3034         case REDIS_SET
: freeSetObject(o
); break; 
3035         case REDIS_ZSET
: freeZsetObject(o
); break; 
3036         case REDIS_HASH
: freeHashObject(o
); break; 
3037         default: redisPanic("Unknown object type"); break; 
3039         if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
3040         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
3041             !listAddNodeHead(server
.objfreelist
,o
)) 
3043         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3047 static robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
3048     dictEntry 
*de 
= dictFind(db
->dict
,key
); 
3050         robj 
*key 
= dictGetEntryKey(de
); 
3051         robj 
*val 
= dictGetEntryVal(de
); 
3053         if (server
.vm_enabled
) { 
3054             if (key
->storage 
== REDIS_VM_MEMORY 
|| 
3055                 key
->storage 
== REDIS_VM_SWAPPING
) 
3057                 /* If we were swapping the object out, stop it, this key 
3059                 if (key
->storage 
== REDIS_VM_SWAPPING
) 
3060                     vmCancelThreadedIOJob(key
); 
3061                 /* Update the access time of the key for the aging algorithm. */ 
3062                 key
->vm
.atime 
= server
.unixtime
; 
3064                 int notify 
= (key
->storage 
== REDIS_VM_LOADING
); 
3066                 /* Our value was swapped on disk. Bring it at home. */ 
3067                 redisAssert(val 
== NULL
); 
3068                 val 
= vmLoadObject(key
); 
3069                 dictGetEntryVal(de
) = val
; 
3071                 /* Clients blocked by the VM subsystem may be waiting for 
3073                 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
); 
3082 static robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
3083     expireIfNeeded(db
,key
); 
3084     return lookupKey(db
,key
); 
3087 static robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
3088     deleteIfVolatile(db
,key
); 
3089     return lookupKey(db
,key
); 
3092 static robj 
*lookupKeyReadOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
3093     robj 
*o 
= lookupKeyRead(c
->db
, key
); 
3094     if (!o
) addReply(c
,reply
); 
3098 static robj 
*lookupKeyWriteOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
3099     robj 
*o 
= lookupKeyWrite(c
->db
, key
); 
3100     if (!o
) addReply(c
,reply
); 
3104 static int checkType(redisClient 
*c
, robj 
*o
, int type
) { 
3105     if (o
->type 
!= type
) { 
3106         addReply(c
,shared
.wrongtypeerr
); 
3112 static int deleteKey(redisDb 
*db
, robj 
*key
) { 
3115     /* We need to protect key from destruction: after the first dictDelete() 
3116      * it may happen that 'key' is no longer valid if we don't increment 
3117      * it's count. This may happen when we get the object reference directly 
3118      * from the hash table with dictRandomKey() or dict iterators */ 
3120     if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
); 
3121     retval 
= dictDelete(db
->dict
,key
); 
3124     return retval 
== DICT_OK
; 
3127 /* Check if the nul-terminated string 's' can be represented by a long 
3128  * (that is, is a number that fits into long without any other space or 
3129  * character before or after the digits). 
3131  * If so, the function returns REDIS_OK and *longval is set to the value 
3132  * of the number. Otherwise REDIS_ERR is returned */ 
3133 static int isStringRepresentableAsLong(sds s
, long *longval
) { 
3134     char buf
[32], *endptr
; 
3138     value 
= strtol(s
, &endptr
, 10); 
3139     if (endptr
[0] != '\0') return REDIS_ERR
; 
3140     slen 
= ll2string(buf
,32,value
); 
3142     /* If the number converted back into a string is not identical 
3143      * then it's not possible to encode the string as integer */ 
3144     if (sdslen(s
) != (unsigned)slen 
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
; 
3145     if (longval
) *longval 
= value
; 
3149 /* Try to encode a string object in order to save space */ 
3150 static robj 
*tryObjectEncoding(robj 
*o
) { 
3154     if (o
->encoding 
!= REDIS_ENCODING_RAW
) 
3155         return o
; /* Already encoded */ 
3157     /* It's not safe to encode shared objects: shared objects can be shared 
3158      * everywhere in the "object space" of Redis. Encoded objects can only 
3159      * appear as "values" (and not, for instance, as keys) */ 
3160      if (o
->refcount 
> 1) return o
; 
3162     /* Currently we try to encode only strings */ 
3163     redisAssert(o
->type 
== REDIS_STRING
); 
3165     /* Check if we can represent this string as a long integer */ 
3166     if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
; 
3168     /* Ok, this object can be encoded */ 
3169     if (value 
>= 0 && value 
< REDIS_SHARED_INTEGERS
) { 
3171         incrRefCount(shared
.integers
[value
]); 
3172         return shared
.integers
[value
]; 
3174         o
->encoding 
= REDIS_ENCODING_INT
; 
3176         o
->ptr 
= (void*) value
; 
3181 /* Get a decoded version of an encoded object (returned as a new object). 
3182  * If the object is already raw-encoded just increment the ref count. */ 
3183 static robj 
*getDecodedObject(robj 
*o
) { 
3186     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3190     if (o
->type 
== REDIS_STRING 
&& o
->encoding 
== REDIS_ENCODING_INT
) { 
3193         ll2string(buf
,32,(long)o
->ptr
); 
3194         dec 
= createStringObject(buf
,strlen(buf
)); 
3197         redisPanic("Unknown encoding type"); 
3201 /* Compare two string objects via strcmp() or alike. 
3202  * Note that the objects may be integer-encoded. In such a case we 
3203  * use ll2string() to get a string representation of the numbers on the stack 
3204  * and compare the strings, it's much faster than calling getDecodedObject(). 
3206  * Important note: if objects are not integer encoded, but binary-safe strings, 
3207  * sdscmp() from sds.c will apply memcmp() so this function ca be considered 
3209 static int compareStringObjects(robj 
*a
, robj 
*b
) { 
3210     redisAssert(a
->type 
== REDIS_STRING 
&& b
->type 
== REDIS_STRING
); 
3211     char bufa
[128], bufb
[128], *astr
, *bstr
; 
3214     if (a 
== b
) return 0; 
3215     if (a
->encoding 
!= REDIS_ENCODING_RAW
) { 
3216         ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
); 
3222     if (b
->encoding 
!= REDIS_ENCODING_RAW
) { 
3223         ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
); 
3229     return bothsds 
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
); 
3232 /* Equal string objects return 1 if the two objects are the same from the 
3233  * point of view of a string comparison, otherwise 0 is returned. Note that 
3234  * this function is faster then checking for (compareStringObject(a,b) == 0) 
3235  * because it can perform some more optimization. */ 
3236 static int equalStringObjects(robj 
*a
, robj 
*b
) { 
3237     if (a
->encoding 
!= REDIS_ENCODING_RAW 
&& b
->encoding 
!= REDIS_ENCODING_RAW
){ 
3238         return a
->ptr 
== b
->ptr
; 
3240         return compareStringObjects(a
,b
) == 0; 
3244 static size_t stringObjectLen(robj 
*o
) { 
3245     redisAssert(o
->type 
== REDIS_STRING
); 
3246     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3247         return sdslen(o
->ptr
); 
3251         return ll2string(buf
,32,(long)o
->ptr
); 
3255 static int getDoubleFromObject(robj 
*o
, double *target
) { 
3262         redisAssert(o
->type 
== REDIS_STRING
); 
3263         if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3264             value 
= strtod(o
->ptr
, &eptr
); 
3265             if (eptr
[0] != '\0') return REDIS_ERR
; 
3266         } else if (o
->encoding 
== REDIS_ENCODING_INT
) { 
3267             value 
= (long)o
->ptr
; 
3269             redisPanic("Unknown string encoding"); 
3277 static int getDoubleFromObjectOrReply(redisClient 
*c
, robj 
*o
, double *target
, const char *msg
) { 
3279     if (getDoubleFromObject(o
, &value
) != REDIS_OK
) { 
3281             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3283             addReplySds(c
, sdsnew("-ERR value is not a double\r\n")); 
3292 static int getLongLongFromObject(robj 
*o
, long long *target
) { 
3299         redisAssert(o
->type 
== REDIS_STRING
); 
3300         if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3301             value 
= strtoll(o
->ptr
, &eptr
, 10); 
3302             if (eptr
[0] != '\0') return REDIS_ERR
; 
3303         } else if (o
->encoding 
== REDIS_ENCODING_INT
) { 
3304             value 
= (long)o
->ptr
; 
3306             redisPanic("Unknown string encoding"); 
3314 static int getLongLongFromObjectOrReply(redisClient 
*c
, robj 
*o
, long long *target
, const char *msg
) { 
3316     if (getLongLongFromObject(o
, &value
) != REDIS_OK
) { 
3318             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3320             addReplySds(c
, sdsnew("-ERR value is not an integer\r\n")); 
3329 static int getLongFromObjectOrReply(redisClient 
*c
, robj 
*o
, long *target
, const char *msg
) { 
3332     if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
; 
3333     if (value 
< LONG_MIN 
|| value 
> LONG_MAX
) { 
3335             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3337             addReplySds(c
, sdsnew("-ERR value is out of range\r\n")); 
3346 /*============================ RDB saving/loading =========================== */ 
3348 static int rdbSaveType(FILE *fp
, unsigned char type
) { 
3349     if (fwrite(&type
,1,1,fp
) == 0) return -1; 
3353 static int rdbSaveTime(FILE *fp
, time_t t
) { 
3354     int32_t t32 
= (int32_t) t
; 
3355     if (fwrite(&t32
,4,1,fp
) == 0) return -1; 
3359 /* check rdbLoadLen() comments for more info */ 
3360 static int rdbSaveLen(FILE *fp
, uint32_t len
) { 
3361     unsigned char buf
[2]; 
3364         /* Save a 6 bit len */ 
3365         buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6); 
3366         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3367     } else if (len 
< (1<<14)) { 
3368         /* Save a 14 bit len */ 
3369         buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6); 
3371         if (fwrite(buf
,2,1,fp
) == 0) return -1; 
3373         /* Save a 32 bit len */ 
3374         buf
[0] = (REDIS_RDB_32BITLEN
<<6); 
3375         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3377         if (fwrite(&len
,4,1,fp
) == 0) return -1; 
3382 /* Encode 'value' as an integer if possible (if integer will fit the 
3383  * supported range). If the function sucessful encoded the integer 
3384  * then the (up to 5 bytes) encoded representation is written in the 
3385  * string pointed by 'enc' and the length is returned. Otherwise 
3387 static int rdbEncodeInteger(long long value
, unsigned char *enc
) { 
3388     /* Finally check if it fits in our ranges */ 
3389     if (value 
>= -(1<<7) && value 
<= (1<<7)-1) { 
3390         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
; 
3391         enc
[1] = value
&0xFF; 
3393     } else if (value 
>= -(1<<15) && value 
<= (1<<15)-1) { 
3394         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
; 
3395         enc
[1] = value
&0xFF; 
3396         enc
[2] = (value
>>8)&0xFF; 
3398     } else if (value 
>= -((long long)1<<31) && value 
<= ((long long)1<<31)-1) { 
3399         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
; 
3400         enc
[1] = value
&0xFF; 
3401         enc
[2] = (value
>>8)&0xFF; 
3402         enc
[3] = (value
>>16)&0xFF; 
3403         enc
[4] = (value
>>24)&0xFF; 
3410 /* String objects in the form "2391" "-100" without any space and with a 
3411  * range of values that can fit in an 8, 16 or 32 bit signed value can be 
3412  * encoded as integers to save space */ 
3413 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) { 
3415     char *endptr
, buf
[32]; 
3417     /* Check if it's possible to encode this value as a number */ 
3418     value 
= strtoll(s
, &endptr
, 10); 
3419     if (endptr
[0] != '\0') return 0; 
3420     ll2string(buf
,32,value
); 
3422     /* If the number converted back into a string is not identical 
3423      * then it's not possible to encode the string as integer */ 
3424     if (strlen(buf
) != len 
|| memcmp(buf
,s
,len
)) return 0; 
3426     return rdbEncodeInteger(value
,enc
); 
3429 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) { 
3430     size_t comprlen
, outlen
; 
3434     /* We require at least four bytes compression for this to be worth it */ 
3435     if (len 
<= 4) return 0; 
3437     if ((out 
= zmalloc(outlen
+1)) == NULL
) return 0; 
3438     comprlen 
= lzf_compress(s
, len
, out
, outlen
); 
3439     if (comprlen 
== 0) { 
3443     /* Data compressed! Let's save it on disk */ 
3444     byte 
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
; 
3445     if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
; 
3446     if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
; 
3447     if (rdbSaveLen(fp
,len
) == -1) goto writeerr
; 
3448     if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
; 
3457 /* Save a string objet as [len][data] on disk. If the object is a string 
3458  * representation of an integer value we try to safe it in a special form */ 
3459 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) { 
3462     /* Try integer encoding */ 
3464         unsigned char buf
[5]; 
3465         if ((enclen 
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) { 
3466             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3471     /* Try LZF compression - under 20 bytes it's unable to compress even 
3472      * aaaaaaaaaaaaaaaaaa so skip it */ 
3473     if (server
.rdbcompression 
&& len 
> 20) { 
3476         retval 
= rdbSaveLzfStringObject(fp
,s
,len
); 
3477         if (retval 
== -1) return -1; 
3478         if (retval 
> 0) return 0; 
3479         /* retval == 0 means data can't be compressed, save the old way */ 
3482     /* Store verbatim */ 
3483     if (rdbSaveLen(fp
,len
) == -1) return -1; 
3484     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return -1; 
3488 /* Like rdbSaveStringObjectRaw() but handle encoded objects */ 
3489 static int rdbSaveStringObject(FILE *fp
, robj 
*obj
) { 
3492     /* Avoid to decode the object, then encode it again, if the 
3493      * object is alrady integer encoded. */ 
3494     if (obj
->encoding 
== REDIS_ENCODING_INT
) { 
3495         long val 
= (long) obj
->ptr
; 
3496         unsigned char buf
[5]; 
3499         if ((enclen 
= rdbEncodeInteger(val
,buf
)) > 0) { 
3500             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3503         /* otherwise... fall throught and continue with the usual 
3507     /* Avoid incr/decr ref count business when possible. 
3508      * This plays well with copy-on-write given that we are probably 
3509      * in a child process (BGSAVE). Also this makes sure key objects 
3510      * of swapped objects are not incRefCount-ed (an assert does not allow 
3511      * this in order to avoid bugs) */ 
3512     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
3513         obj 
= getDecodedObject(obj
); 
3514         retval 
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3517         retval 
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3522 /* Save a double value. Doubles are saved as strings prefixed by an unsigned 
3523  * 8 bit integer specifing the length of the representation. 
3524  * This 8 bit integer has special values in order to specify the following 
3530 static int rdbSaveDoubleValue(FILE *fp
, double val
) { 
3531     unsigned char buf
[128]; 
3537     } else if (!isfinite(val
)) { 
3539         buf
[0] = (val 
< 0) ? 255 : 254; 
3541 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL) 
3542         /* Check if the float is in a safe range to be casted into a 
3543          * long long. We are assuming that long long is 64 bit here. 
3544          * Also we are assuming that there are no implementations around where 
3545          * double has precision < 52 bit. 
3547          * Under this assumptions we test if a double is inside an interval 
3548          * where casting to long long is safe. Then using two castings we 
3549          * make sure the decimal part is zero. If all this is true we use 
3550          * integer printing function that is much faster. */ 
3551         double min 
= -4503599627370495; /* (2^52)-1 */ 
3552         double max 
= 4503599627370496; /* -(2^52) */ 
3553         if (val 
> min 
&& val 
< max 
&& val 
== ((double)((long long)val
))) 
3554             ll2string((char*)buf
+1,sizeof(buf
),(long long)val
); 
3557             snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
); 
3558         buf
[0] = strlen((char*)buf
+1); 
3561     if (fwrite(buf
,len
,1,fp
) == 0) return -1; 
3565 /* Save a Redis object. */ 
3566 static int rdbSaveObject(FILE *fp
, robj 
*o
) { 
3567     if (o
->type 
== REDIS_STRING
) { 
3568         /* Save a string value */ 
3569         if (rdbSaveStringObject(fp
,o
) == -1) return -1; 
3570     } else if (o
->type 
== REDIS_LIST
) { 
3571         /* Save a list value */ 
3572         list 
*list 
= o
->ptr
; 
3576         if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1; 
3577         listRewind(list
,&li
); 
3578         while((ln 
= listNext(&li
))) { 
3579             robj 
*eleobj 
= listNodeValue(ln
); 
3581             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3583     } else if (o
->type 
== REDIS_SET
) { 
3584         /* Save a set value */ 
3586         dictIterator 
*di 
= dictGetIterator(set
); 
3589         if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1; 
3590         while((de 
= dictNext(di
)) != NULL
) { 
3591             robj 
*eleobj 
= dictGetEntryKey(de
); 
3593             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3595         dictReleaseIterator(di
); 
3596     } else if (o
->type 
== REDIS_ZSET
) { 
3597         /* Save a set value */ 
3599         dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
3602         if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1; 
3603         while((de 
= dictNext(di
)) != NULL
) { 
3604             robj 
*eleobj 
= dictGetEntryKey(de
); 
3605             double *score 
= dictGetEntryVal(de
); 
3607             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3608             if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1; 
3610         dictReleaseIterator(di
); 
3611     } else if (o
->type 
== REDIS_HASH
) { 
3612         /* Save a hash value */ 
3613         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
3614             unsigned char *p 
= zipmapRewind(o
->ptr
); 
3615             unsigned int count 
= zipmapLen(o
->ptr
); 
3616             unsigned char *key
, *val
; 
3617             unsigned int klen
, vlen
; 
3619             if (rdbSaveLen(fp
,count
) == -1) return -1; 
3620             while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
3621                 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1; 
3622                 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1; 
3625             dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
3628             if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1; 
3629             while((de 
= dictNext(di
)) != NULL
) { 
3630                 robj 
*key 
= dictGetEntryKey(de
); 
3631                 robj 
*val 
= dictGetEntryVal(de
); 
3633                 if (rdbSaveStringObject(fp
,key
) == -1) return -1; 
3634                 if (rdbSaveStringObject(fp
,val
) == -1) return -1; 
3636             dictReleaseIterator(di
); 
3639         redisPanic("Unknown object type"); 
3644 /* Return the length the object will have on disk if saved with 
3645  * the rdbSaveObject() function. Currently we use a trick to get 
3646  * this length with very little changes to the code. In the future 
3647  * we could switch to a faster solution. */ 
3648 static off_t 
rdbSavedObjectLen(robj 
*o
, FILE *fp
) { 
3649     if (fp 
== NULL
) fp 
= server
.devnull
; 
3651     assert(rdbSaveObject(fp
,o
) != 1); 
3655 /* Return the number of pages required to save this object in the swap file */ 
3656 static off_t 
rdbSavedObjectPages(robj 
*o
, FILE *fp
) { 
3657     off_t bytes 
= rdbSavedObjectLen(o
,fp
); 
3659     return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
; 
3662 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */ 
3663 static int rdbSave(char *filename
) { 
3664     dictIterator 
*di 
= NULL
; 
3669     time_t now 
= time(NULL
); 
3671     /* Wait for I/O therads to terminate, just in case this is a 
3672      * foreground-saving, to avoid seeking the swap file descriptor at the 
3674     if (server
.vm_enabled
) 
3675         waitEmptyIOJobsQueue(); 
3677     snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid()); 
3678     fp 
= fopen(tmpfile
,"w"); 
3680         redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
)); 
3683     if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
; 
3684     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
3685         redisDb 
*db 
= server
.db
+j
; 
3687         if (dictSize(d
) == 0) continue; 
3688         di 
= dictGetIterator(d
); 
3694         /* Write the SELECT DB opcode */ 
3695         if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
; 
3696         if (rdbSaveLen(fp
,j
) == -1) goto werr
; 
3698         /* Iterate this DB writing every entry */ 
3699         while((de 
= dictNext(di
)) != NULL
) { 
3700             robj 
*key 
= dictGetEntryKey(de
); 
3701             robj 
*o 
= dictGetEntryVal(de
); 
3702             time_t expiretime 
= getExpire(db
,key
); 
3704             /* Save the expire time */ 
3705             if (expiretime 
!= -1) { 
3706                 /* If this key is already expired skip it */ 
3707                 if (expiretime 
< now
) continue; 
3708                 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
; 
3709                 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
; 
3711             /* Save the key and associated value. This requires special 
3712              * handling if the value is swapped out. */ 
3713             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
3714                                       key
->storage 
== REDIS_VM_SWAPPING
) { 
3715                 /* Save type, key, value */ 
3716                 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
; 
3717                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3718                 if (rdbSaveObject(fp
,o
) == -1) goto werr
; 
3720                 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
3722                 /* Get a preview of the object in memory */ 
3723                 po 
= vmPreviewObject(key
); 
3724                 /* Save type, key, value */ 
3725                 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
; 
3726                 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
; 
3727                 if (rdbSaveObject(fp
,po
) == -1) goto werr
; 
3728                 /* Remove the loaded object from memory */ 
3732         dictReleaseIterator(di
); 
3735     if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
; 
3737     /* Make sure data will not remain on the OS's output buffers */ 
3742     /* Use RENAME to make sure the DB file is changed atomically only 
3743      * if the generate DB file is ok. */ 
3744     if (rename(tmpfile
,filename
) == -1) { 
3745         redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
)); 
3749     redisLog(REDIS_NOTICE
,"DB saved on disk"); 
3751     server
.lastsave 
= time(NULL
); 
3757     redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
)); 
3758     if (di
) dictReleaseIterator(di
); 
3762 static int rdbSaveBackground(char *filename
) { 
3765     if (server
.bgsavechildpid 
!= -1) return REDIS_ERR
; 
3766     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
3767     if ((childpid 
= fork()) == 0) { 
3769         if (server
.vm_enabled
) vmReopenSwapFile(); 
3771         if (rdbSave(filename
) == REDIS_OK
) { 
3778         if (childpid 
== -1) { 
3779             redisLog(REDIS_WARNING
,"Can't save in background: fork: %s", 
3783         redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
); 
3784         server
.bgsavechildpid 
= childpid
; 
3785         updateDictResizePolicy(); 
3788     return REDIS_OK
; /* unreached */ 
3791 static void rdbRemoveTempFile(pid_t childpid
) { 
3794     snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
); 
3798 static int rdbLoadType(FILE *fp
) { 
3800     if (fread(&type
,1,1,fp
) == 0) return -1; 
3804 static time_t rdbLoadTime(FILE *fp
) { 
3806     if (fread(&t32
,4,1,fp
) == 0) return -1; 
3807     return (time_t) t32
; 
3810 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top 
3811  * of this file for a description of how this are stored on disk. 
3813  * isencoded is set to 1 if the readed length is not actually a length but 
3814  * an "encoding type", check the above comments for more info */ 
3815 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) { 
3816     unsigned char buf
[2]; 
3820     if (isencoded
) *isencoded 
= 0; 
3821     if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3822     type 
= (buf
[0]&0xC0)>>6; 
3823     if (type 
== REDIS_RDB_6BITLEN
) { 
3824         /* Read a 6 bit len */ 
3826     } else if (type 
== REDIS_RDB_ENCVAL
) { 
3827         /* Read a 6 bit len encoding type */ 
3828         if (isencoded
) *isencoded 
= 1; 
3830     } else if (type 
== REDIS_RDB_14BITLEN
) { 
3831         /* Read a 14 bit len */ 
3832         if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3833         return ((buf
[0]&0x3F)<<8)|buf
[1]; 
3835         /* Read a 32 bit len */ 
3836         if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
; 
3841 /* Load an integer-encoded object from file 'fp', with the specified 
3842  * encoding type 'enctype'. If encode is true the function may return 
3843  * an integer-encoded object as reply, otherwise the returned object 
3844  * will always be encoded as a raw string. */ 
3845 static robj 
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) { 
3846     unsigned char enc
[4]; 
3849     if (enctype 
== REDIS_RDB_ENC_INT8
) { 
3850         if (fread(enc
,1,1,fp
) == 0) return NULL
; 
3851         val 
= (signed char)enc
[0]; 
3852     } else if (enctype 
== REDIS_RDB_ENC_INT16
) { 
3854         if (fread(enc
,2,1,fp
) == 0) return NULL
; 
3855         v 
= enc
[0]|(enc
[1]<<8); 
3857     } else if (enctype 
== REDIS_RDB_ENC_INT32
) { 
3859         if (fread(enc
,4,1,fp
) == 0) return NULL
; 
3860         v 
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24); 
3863         val 
= 0; /* anti-warning */ 
3864         redisPanic("Unknown RDB integer encoding type"); 
3867         return createStringObjectFromLongLong(val
); 
3869         return createObject(REDIS_STRING
,sdsfromlonglong(val
)); 
3872 static robj 
*rdbLoadLzfStringObject(FILE*fp
) { 
3873     unsigned int len
, clen
; 
3874     unsigned char *c 
= NULL
; 
3877     if ((clen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3878     if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3879     if ((c 
= zmalloc(clen
)) == NULL
) goto err
; 
3880     if ((val 
= sdsnewlen(NULL
,len
)) == NULL
) goto err
; 
3881     if (fread(c
,clen
,1,fp
) == 0) goto err
; 
3882     if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
; 
3884     return createObject(REDIS_STRING
,val
); 
3891 static robj 
*rdbGenericLoadStringObject(FILE*fp
, int encode
) { 
3896     len 
= rdbLoadLen(fp
,&isencoded
); 
3899         case REDIS_RDB_ENC_INT8
: 
3900         case REDIS_RDB_ENC_INT16
: 
3901         case REDIS_RDB_ENC_INT32
: 
3902             return rdbLoadIntegerObject(fp
,len
,encode
); 
3903         case REDIS_RDB_ENC_LZF
: 
3904             return rdbLoadLzfStringObject(fp
); 
3906             redisPanic("Unknown RDB encoding type"); 
3910     if (len 
== REDIS_RDB_LENERR
) return NULL
; 
3911     val 
= sdsnewlen(NULL
,len
); 
3912     if (len 
&& fread(val
,len
,1,fp
) == 0) { 
3916     return createObject(REDIS_STRING
,val
); 
3919 static robj 
*rdbLoadStringObject(FILE *fp
) { 
3920     return rdbGenericLoadStringObject(fp
,0); 
3923 static robj 
*rdbLoadEncodedStringObject(FILE *fp
) { 
3924     return rdbGenericLoadStringObject(fp
,1); 
3927 /* For information about double serialization check rdbSaveDoubleValue() */ 
3928 static int rdbLoadDoubleValue(FILE *fp
, double *val
) { 
3932     if (fread(&len
,1,1,fp
) == 0) return -1; 
3934     case 255: *val 
= R_NegInf
; return 0; 
3935     case 254: *val 
= R_PosInf
; return 0; 
3936     case 253: *val 
= R_Nan
; return 0; 
3938         if (fread(buf
,len
,1,fp
) == 0) return -1; 
3940         sscanf(buf
, "%lg", val
); 
3945 /* Load a Redis object of the specified type from the specified file. 
3946  * On success a newly allocated object is returned, otherwise NULL. */ 
3947 static robj 
*rdbLoadObject(int type
, FILE *fp
) { 
3950     redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
)); 
3951     if (type 
== REDIS_STRING
) { 
3952         /* Read string value */ 
3953         if ((o 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
3954         o 
= tryObjectEncoding(o
); 
3955     } else if (type 
== REDIS_LIST 
|| type 
== REDIS_SET
) { 
3956         /* Read list/set value */ 
3959         if ((listlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3960         o 
= (type 
== REDIS_LIST
) ? createListObject() : createSetObject(); 
3961         /* It's faster to expand the dict to the right size asap in order 
3962          * to avoid rehashing */ 
3963         if (type 
== REDIS_SET 
&& listlen 
> DICT_HT_INITIAL_SIZE
) 
3964             dictExpand(o
->ptr
,listlen
); 
3965         /* Load every single element of the list/set */ 
3969             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
3970             ele 
= tryObjectEncoding(ele
); 
3971             if (type 
== REDIS_LIST
) { 
3972                 listAddNodeTail((list
*)o
->ptr
,ele
); 
3974                 dictAdd((dict
*)o
->ptr
,ele
,NULL
); 
3977     } else if (type 
== REDIS_ZSET
) { 
3978         /* Read list/set value */ 
3982         if ((zsetlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
3983         o 
= createZsetObject(); 
3985         /* Load every single element of the list/set */ 
3988             double *score 
= zmalloc(sizeof(double)); 
3990             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
3991             ele 
= tryObjectEncoding(ele
); 
3992             if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
; 
3993             dictAdd(zs
->dict
,ele
,score
); 
3994             zslInsert(zs
->zsl
,*score
,ele
); 
3995             incrRefCount(ele
); /* added to skiplist */ 
3997     } else if (type 
== REDIS_HASH
) { 
4000         if ((hashlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4001         o 
= createHashObject(); 
4002         /* Too many entries? Use an hash table. */ 
4003         if (hashlen 
> server
.hash_max_zipmap_entries
) 
4004             convertToRealHash(o
); 
4005         /* Load every key/value, then set it into the zipmap or hash 
4006          * table, as needed. */ 
4010             if ((key 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
4011             if ((val 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
4012             /* If we are using a zipmap and there are too big values 
4013              * the object is converted to real hash table encoding. */ 
4014             if (o
->encoding 
!= REDIS_ENCODING_HT 
&& 
4015                (sdslen(key
->ptr
) > server
.hash_max_zipmap_value 
|| 
4016                 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
)) 
4018                     convertToRealHash(o
); 
4021             if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
4022                 unsigned char *zm 
= o
->ptr
; 
4024                 zm 
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
), 
4025                                   val
->ptr
,sdslen(val
->ptr
),NULL
); 
4030                 key 
= tryObjectEncoding(key
); 
4031                 val 
= tryObjectEncoding(val
); 
4032                 dictAdd((dict
*)o
->ptr
,key
,val
); 
4036         redisPanic("Unknown object type"); 
4041 static int rdbLoad(char *filename
) { 
4044     int type
, retval
, rdbver
; 
4045     int swap_all_values 
= 0; 
4046     dict 
*d 
= server
.db
[0].dict
; 
4047     redisDb 
*db 
= server
.db
+0; 
4049     time_t expiretime
, now 
= time(NULL
); 
4050     long long loadedkeys 
= 0; 
4052     fp 
= fopen(filename
,"r"); 
4053     if (!fp
) return REDIS_ERR
; 
4054     if (fread(buf
,9,1,fp
) == 0) goto eoferr
; 
4056     if (memcmp(buf
,"REDIS",5) != 0) { 
4058         redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file"); 
4061     rdbver 
= atoi(buf
+5); 
4064         redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
); 
4072         if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
4073         if (type 
== REDIS_EXPIRETIME
) { 
4074             if ((expiretime 
= rdbLoadTime(fp
)) == -1) goto eoferr
; 
4075             /* We read the time so we need to read the object type again */ 
4076             if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
4078         if (type 
== REDIS_EOF
) break; 
4079         /* Handle SELECT DB opcode as a special case */ 
4080         if (type 
== REDIS_SELECTDB
) { 
4081             if ((dbid 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) 
4083             if (dbid 
>= (unsigned)server
.dbnum
) { 
4084                 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
); 
4087             db 
= server
.db
+dbid
; 
4092         if ((key 
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
; 
4094         if ((val 
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
; 
4095         /* Check if the key already expired */ 
4096         if (expiretime 
!= -1 && expiretime 
< now
) { 
4101         /* Add the new object in the hash table */ 
4102         retval 
= dictAdd(d
,key
,val
); 
4103         if (retval 
== DICT_ERR
) { 
4104             redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
); 
4108         /* Set the expire time if needed */ 
4109         if (expiretime 
!= -1) setExpire(db
,key
,expiretime
); 
4111         /* Handle swapping while loading big datasets when VM is on */ 
4113         /* If we detecter we are hopeless about fitting something in memory 
4114          * we just swap every new key on disk. Directly... 
4115          * Note that's important to check for this condition before resorting 
4116          * to random sampling, otherwise we may try to swap already 
4118         if (swap_all_values
) { 
4119             dictEntry 
*de 
= dictFind(d
,key
); 
4121             /* de may be NULL since the key already expired */ 
4123                 key 
= dictGetEntryKey(de
); 
4124                 val 
= dictGetEntryVal(de
); 
4126                 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
4127                     dictGetEntryVal(de
) = NULL
; 
4133         /* If we have still some hope of having some value fitting memory 
4134          * then we try random sampling. */ 
4135         if (!swap_all_values 
&& server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
4136             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
4137                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
4139             if (zmalloc_used_memory() > server
.vm_max_memory
) 
4140                 swap_all_values 
= 1; /* We are already using too much mem */ 
4146 eoferr
: /* unexpected end of file is handled here with a fatal exit */ 
4147     redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now."); 
4149     return REDIS_ERR
; /* Just to avoid warning */ 
4152 /*================================== Commands =============================== */ 
4154 static void authCommand(redisClient 
*c
) { 
4155     if (!server
.requirepass 
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) { 
4156       c
->authenticated 
= 1; 
4157       addReply(c
,shared
.ok
); 
4159       c
->authenticated 
= 0; 
4160       addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); 
4164 static void pingCommand(redisClient 
*c
) { 
4165     addReply(c
,shared
.pong
); 
4168 static void echoCommand(redisClient 
*c
) { 
4169     addReplyBulk(c
,c
->argv
[1]); 
4172 /*=================================== Strings =============================== */ 
4174 static void setGenericCommand(redisClient 
*c
, int nx
, robj 
*key
, robj 
*val
, robj 
*expire
) { 
4176     long seconds 
= 0; /* initialized to avoid an harmness warning */ 
4179         if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
) 
4182             addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n")); 
4187     if (nx
) deleteIfVolatile(c
->db
,key
); 
4188     retval 
= dictAdd(c
->db
->dict
,key
,val
); 
4189     if (retval 
== DICT_ERR
) { 
4191             /* If the key is about a swapped value, we want a new key object 
4192              * to overwrite the old. So we delete the old key in the database. 
4193              * This will also make sure that swap pages about the old object 
4194              * will be marked as free. */ 
4195             if (server
.vm_enabled 
&& deleteIfSwapped(c
->db
,key
)) 
4197             dictReplace(c
->db
->dict
,key
,val
); 
4200             addReply(c
,shared
.czero
); 
4208     removeExpire(c
->db
,key
); 
4209     if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
); 
4210     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
4213 static void setCommand(redisClient 
*c
) { 
4214     setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
); 
4217 static void setnxCommand(redisClient 
*c
) { 
4218     setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
); 
4221 static void setexCommand(redisClient 
*c
) { 
4222     setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]); 
4225 static int getGenericCommand(redisClient 
*c
) { 
4228     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
) 
4231     if (o
->type 
!= REDIS_STRING
) { 
4232         addReply(c
,shared
.wrongtypeerr
); 
4240 static void getCommand(redisClient 
*c
) { 
4241     getGenericCommand(c
); 
4244 static void getsetCommand(redisClient 
*c
) { 
4245     if (getGenericCommand(c
) == REDIS_ERR
) return; 
4246     if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) { 
4247         dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
4249         incrRefCount(c
->argv
[1]); 
4251     incrRefCount(c
->argv
[2]); 
4253     removeExpire(c
->db
,c
->argv
[1]); 
4256 static void mgetCommand(redisClient 
*c
) { 
4259     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1)); 
4260     for (j 
= 1; j 
< c
->argc
; j
++) { 
4261         robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[j
]); 
4263             addReply(c
,shared
.nullbulk
); 
4265             if (o
->type 
!= REDIS_STRING
) { 
4266                 addReply(c
,shared
.nullbulk
); 
4274 static void msetGenericCommand(redisClient 
*c
, int nx
) { 
4275     int j
, busykeys 
= 0; 
4277     if ((c
->argc 
% 2) == 0) { 
4278         addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n")); 
4281     /* Handle the NX flag. The MSETNX semantic is to return zero and don't 
4282      * set nothing at all if at least one already key exists. */ 
4284         for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
4285             if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) { 
4291         addReply(c
, shared
.czero
); 
4295     for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
4298         c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]); 
4299         retval 
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
4300         if (retval 
== DICT_ERR
) { 
4301             dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]); 
4302             incrRefCount(c
->argv
[j
+1]); 
4304             incrRefCount(c
->argv
[j
]); 
4305             incrRefCount(c
->argv
[j
+1]); 
4307         removeExpire(c
->db
,c
->argv
[j
]); 
4309     server
.dirty 
+= (c
->argc
-1)/2; 
4310     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
4313 static void msetCommand(redisClient 
*c
) { 
4314     msetGenericCommand(c
,0); 
4317 static void msetnxCommand(redisClient 
*c
) { 
4318     msetGenericCommand(c
,1); 
4321 static void incrDecrCommand(redisClient 
*c
, long long incr
) { 
4326     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4327     if (o 
!= NULL 
&& checkType(c
,o
,REDIS_STRING
)) return; 
4328     if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return; 
4331     o 
= createStringObjectFromLongLong(value
); 
4332     retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],o
); 
4333     if (retval 
== DICT_ERR
) { 
4334         dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
4335         removeExpire(c
->db
,c
->argv
[1]); 
4337         incrRefCount(c
->argv
[1]); 
4340     addReply(c
,shared
.colon
); 
4342     addReply(c
,shared
.crlf
); 
4345 static void incrCommand(redisClient 
*c
) { 
4346     incrDecrCommand(c
,1); 
4349 static void decrCommand(redisClient 
*c
) { 
4350     incrDecrCommand(c
,-1); 
4353 static void incrbyCommand(redisClient 
*c
) { 
4356     if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return; 
4357     incrDecrCommand(c
,incr
); 
4360 static void decrbyCommand(redisClient 
*c
) { 
4363     if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return; 
4364     incrDecrCommand(c
,-incr
); 
4367 static void appendCommand(redisClient 
*c
) { 
4372     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4374         /* Create the key */ 
4375         retval 
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]); 
4376         incrRefCount(c
->argv
[1]); 
4377         incrRefCount(c
->argv
[2]); 
4378         totlen 
= stringObjectLen(c
->argv
[2]); 
4382         de 
= dictFind(c
->db
->dict
,c
->argv
[1]); 
4385         o 
= dictGetEntryVal(de
); 
4386         if (o
->type 
!= REDIS_STRING
) { 
4387             addReply(c
,shared
.wrongtypeerr
); 
4390         /* If the object is specially encoded or shared we have to make 
4392         if (o
->refcount 
!= 1 || o
->encoding 
!= REDIS_ENCODING_RAW
) { 
4393             robj 
*decoded 
= getDecodedObject(o
); 
4395             o 
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
)); 
4396             decrRefCount(decoded
); 
4397             dictReplace(c
->db
->dict
,c
->argv
[1],o
); 
4400         if (c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW
) { 
4401             o
->ptr 
= sdscatlen(o
->ptr
, 
4402                 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
)); 
4404             o
->ptr 
= sdscatprintf(o
->ptr
, "%ld", 
4405                 (unsigned long) c
->argv
[2]->ptr
); 
4407         totlen 
= sdslen(o
->ptr
); 
4410     addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
)); 
4413 static void substrCommand(redisClient 
*c
) { 
4415     long start 
= atoi(c
->argv
[2]->ptr
); 
4416     long end 
= atoi(c
->argv
[3]->ptr
); 
4417     size_t rangelen
, strlen
; 
4420     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4421         checkType(c
,o
,REDIS_STRING
)) return; 
4423     o 
= getDecodedObject(o
); 
4424     strlen 
= sdslen(o
->ptr
); 
4426     /* convert negative indexes */ 
4427     if (start 
< 0) start 
= strlen
+start
; 
4428     if (end 
< 0) end 
= strlen
+end
; 
4429     if (start 
< 0) start 
= 0; 
4430     if (end 
< 0) end 
= 0; 
4432     /* indexes sanity checks */ 
4433     if (start 
> end 
|| (size_t)start 
>= strlen
) { 
4434         /* Out of range start or start > end result in null reply */ 
4435         addReply(c
,shared
.nullbulk
); 
4439     if ((size_t)end 
>= strlen
) end 
= strlen
-1; 
4440     rangelen 
= (end
-start
)+1; 
4442     /* Return the result */ 
4443     addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
)); 
4444     range 
= sdsnewlen((char*)o
->ptr
+start
,rangelen
); 
4445     addReplySds(c
,range
); 
4446     addReply(c
,shared
.crlf
); 
4450 /* ========================= Type agnostic commands ========================= */ 
4452 static void delCommand(redisClient 
*c
) { 
4455     for (j 
= 1; j 
< c
->argc
; j
++) { 
4456         if (deleteKey(c
->db
,c
->argv
[j
])) { 
4461     addReplyLongLong(c
,deleted
); 
4464 static void existsCommand(redisClient 
*c
) { 
4465     expireIfNeeded(c
->db
,c
->argv
[1]); 
4466     if (dictFind(c
->db
->dict
,c
->argv
[1])) { 
4467         addReply(c
, shared
.cone
); 
4469         addReply(c
, shared
.czero
); 
4473 static void selectCommand(redisClient 
*c
) { 
4474     int id 
= atoi(c
->argv
[1]->ptr
); 
4476     if (selectDb(c
,id
) == REDIS_ERR
) { 
4477         addReplySds(c
,sdsnew("-ERR invalid DB index\r\n")); 
4479         addReply(c
,shared
.ok
); 
4483 static void randomkeyCommand(redisClient 
*c
) { 
4488         de 
= dictGetRandomKey(c
->db
->dict
); 
4489         if (!de 
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break; 
4493         addReply(c
,shared
.nullbulk
); 
4497     key 
= dictGetEntryKey(de
); 
4498     if (server
.vm_enabled
) { 
4499         key 
= dupStringObject(key
); 
4500         addReplyBulk(c
,key
); 
4503         addReplyBulk(c
,key
); 
4507 static void keysCommand(redisClient 
*c
) { 
4510     sds pattern 
= c
->argv
[1]->ptr
; 
4511     int plen 
= sdslen(pattern
); 
4512     unsigned long numkeys 
= 0; 
4513     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
4515     di 
= dictGetIterator(c
->db
->dict
); 
4517     decrRefCount(lenobj
); 
4518     while((de 
= dictNext(di
)) != NULL
) { 
4519         robj 
*keyobj 
= dictGetEntryKey(de
); 
4521         sds key 
= keyobj
->ptr
; 
4522         if ((pattern
[0] == '*' && pattern
[1] == '\0') || 
4523             stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
4524             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
4525                 addReplyBulk(c
,keyobj
); 
4530     dictReleaseIterator(di
); 
4531     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
); 
4534 static void dbsizeCommand(redisClient 
*c
) { 
4536         sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
))); 
4539 static void lastsaveCommand(redisClient 
*c
) { 
4541         sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
)); 
4544 static void typeCommand(redisClient 
*c
) { 
4548     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4553         case REDIS_STRING
: type 
= "+string"; break; 
4554         case REDIS_LIST
: type 
= "+list"; break; 
4555         case REDIS_SET
: type 
= "+set"; break; 
4556         case REDIS_ZSET
: type 
= "+zset"; break; 
4557         case REDIS_HASH
: type 
= "+hash"; break; 
4558         default: type 
= "+unknown"; break; 
4561     addReplySds(c
,sdsnew(type
)); 
4562     addReply(c
,shared
.crlf
); 
4565 static void saveCommand(redisClient 
*c
) { 
4566     if (server
.bgsavechildpid 
!= -1) { 
4567         addReplySds(c
,sdsnew("-ERR background save in progress\r\n")); 
4570     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4571         addReply(c
,shared
.ok
); 
4573         addReply(c
,shared
.err
); 
4577 static void bgsaveCommand(redisClient 
*c
) { 
4578     if (server
.bgsavechildpid 
!= -1) { 
4579         addReplySds(c
,sdsnew("-ERR background save already in progress\r\n")); 
4582     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
4583         char *status 
= "+Background saving started\r\n"; 
4584         addReplySds(c
,sdsnew(status
)); 
4586         addReply(c
,shared
.err
); 
4590 static void shutdownCommand(redisClient 
*c
) { 
4591     redisLog(REDIS_WARNING
,"User requested shutdown, saving DB..."); 
4592     /* Kill the saving child if there is a background saving in progress. 
4593        We want to avoid race conditions, for instance our saving child may 
4594        overwrite the synchronous saving did by SHUTDOWN. */ 
4595     if (server
.bgsavechildpid 
!= -1) { 
4596         redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!"); 
4597         kill(server
.bgsavechildpid
,SIGKILL
); 
4598         rdbRemoveTempFile(server
.bgsavechildpid
); 
4600     if (server
.appendonly
) { 
4601         /* Append only file: fsync() the AOF and exit */ 
4602         fsync(server
.appendfd
); 
4603         if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4606         /* Snapshotting. Perform a SYNC SAVE and exit */ 
4607         if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4608             if (server
.daemonize
) 
4609                 unlink(server
.pidfile
); 
4610             redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory()); 
4611             redisLog(REDIS_WARNING
,"Server exit now, bye bye..."); 
4614             /* Ooops.. error saving! The best we can do is to continue 
4615              * operating. Note that if there was a background saving process, 
4616              * in the next cron() Redis will be notified that the background 
4617              * saving aborted, handling special stuff like slaves pending for 
4618              * synchronization... */ 
4619             redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit"); 
4621                 sdsnew("-ERR can't quit, problems saving the DB\r\n")); 
4626 static void renameGenericCommand(redisClient 
*c
, int nx
) { 
4629     /* To use the same key as src and dst is probably an error */ 
4630     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
4631         addReply(c
,shared
.sameobjecterr
); 
4635     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
) 
4639     deleteIfVolatile(c
->db
,c
->argv
[2]); 
4640     if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) { 
4643             addReply(c
,shared
.czero
); 
4646         dictReplace(c
->db
->dict
,c
->argv
[2],o
); 
4648         incrRefCount(c
->argv
[2]); 
4650     deleteKey(c
->db
,c
->argv
[1]); 
4652     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
4655 static void renameCommand(redisClient 
*c
) { 
4656     renameGenericCommand(c
,0); 
4659 static void renamenxCommand(redisClient 
*c
) { 
4660     renameGenericCommand(c
,1); 
4663 static void moveCommand(redisClient 
*c
) { 
4668     /* Obtain source and target DB pointers */ 
4671     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
4672         addReply(c
,shared
.outofrangeerr
); 
4676     selectDb(c
,srcid
); /* Back to the source DB */ 
4678     /* If the user is moving using as target the same 
4679      * DB as the source DB it is probably an error. */ 
4681         addReply(c
,shared
.sameobjecterr
); 
4685     /* Check if the element exists and get a reference */ 
4686     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4688         addReply(c
,shared
.czero
); 
4692     /* Try to add the element to the target DB */ 
4693     deleteIfVolatile(dst
,c
->argv
[1]); 
4694     if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) { 
4695         addReply(c
,shared
.czero
); 
4698     incrRefCount(c
->argv
[1]); 
4701     /* OK! key moved, free the entry in the source DB */ 
4702     deleteKey(src
,c
->argv
[1]); 
4704     addReply(c
,shared
.cone
); 
4707 /* =================================== Lists ================================ */ 
4708 static void pushGenericCommand(redisClient 
*c
, int where
) { 
4712     lobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4714         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4715             addReply(c
,shared
.cone
); 
4718         lobj 
= createListObject(); 
4720         if (where 
== REDIS_HEAD
) { 
4721             listAddNodeHead(list
,c
->argv
[2]); 
4723             listAddNodeTail(list
,c
->argv
[2]); 
4725         dictAdd(c
->db
->dict
,c
->argv
[1],lobj
); 
4726         incrRefCount(c
->argv
[1]); 
4727         incrRefCount(c
->argv
[2]); 
4729         if (lobj
->type 
!= REDIS_LIST
) { 
4730             addReply(c
,shared
.wrongtypeerr
); 
4733         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
4734             addReply(c
,shared
.cone
); 
4738         if (where 
== REDIS_HEAD
) { 
4739             listAddNodeHead(list
,c
->argv
[2]); 
4741             listAddNodeTail(list
,c
->argv
[2]); 
4743         incrRefCount(c
->argv
[2]); 
4746     addReplyLongLong(c
,listLength(list
)); 
4749 static void lpushCommand(redisClient 
*c
) { 
4750     pushGenericCommand(c
,REDIS_HEAD
); 
4753 static void rpushCommand(redisClient 
*c
) { 
4754     pushGenericCommand(c
,REDIS_TAIL
); 
4757 static void llenCommand(redisClient 
*c
) { 
4761     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4762         checkType(c
,o
,REDIS_LIST
)) return; 
4765     addReplyUlong(c
,listLength(l
)); 
4768 static void lindexCommand(redisClient 
*c
) { 
4770     int index 
= atoi(c
->argv
[2]->ptr
); 
4774     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4775         checkType(c
,o
,REDIS_LIST
)) return; 
4778     ln 
= listIndex(list
, index
); 
4780         addReply(c
,shared
.nullbulk
); 
4782         robj 
*ele 
= listNodeValue(ln
); 
4783         addReplyBulk(c
,ele
); 
4787 static void lsetCommand(redisClient 
*c
) { 
4789     int index 
= atoi(c
->argv
[2]->ptr
); 
4793     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL 
|| 
4794         checkType(c
,o
,REDIS_LIST
)) return; 
4797     ln 
= listIndex(list
, index
); 
4799         addReply(c
,shared
.outofrangeerr
); 
4801         robj 
*ele 
= listNodeValue(ln
); 
4804         listNodeValue(ln
) = c
->argv
[3]; 
4805         incrRefCount(c
->argv
[3]); 
4806         addReply(c
,shared
.ok
); 
4811 static void popGenericCommand(redisClient 
*c
, int where
) { 
4816     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4817         checkType(c
,o
,REDIS_LIST
)) return; 
4820     if (where 
== REDIS_HEAD
) 
4821         ln 
= listFirst(list
); 
4823         ln 
= listLast(list
); 
4826         addReply(c
,shared
.nullbulk
); 
4828         robj 
*ele 
= listNodeValue(ln
); 
4829         addReplyBulk(c
,ele
); 
4830         listDelNode(list
,ln
); 
4831         if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4836 static void lpopCommand(redisClient 
*c
) { 
4837     popGenericCommand(c
,REDIS_HEAD
); 
4840 static void rpopCommand(redisClient 
*c
) { 
4841     popGenericCommand(c
,REDIS_TAIL
); 
4844 static void lrangeCommand(redisClient 
*c
) { 
4846     int start 
= atoi(c
->argv
[2]->ptr
); 
4847     int end 
= atoi(c
->argv
[3]->ptr
); 
4854     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
4855          || checkType(c
,o
,REDIS_LIST
)) return; 
4857     llen 
= listLength(list
); 
4859     /* convert negative indexes */ 
4860     if (start 
< 0) start 
= llen
+start
; 
4861     if (end 
< 0) end 
= llen
+end
; 
4862     if (start 
< 0) start 
= 0; 
4863     if (end 
< 0) end 
= 0; 
4865     /* indexes sanity checks */ 
4866     if (start 
> end 
|| start 
>= llen
) { 
4867         /* Out of range start or start > end result in empty list */ 
4868         addReply(c
,shared
.emptymultibulk
); 
4871     if (end 
>= llen
) end 
= llen
-1; 
4872     rangelen 
= (end
-start
)+1; 
4874     /* Return the result in form of a multi-bulk reply */ 
4875     ln 
= listIndex(list
, start
); 
4876     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
)); 
4877     for (j 
= 0; j 
< rangelen
; j
++) { 
4878         ele 
= listNodeValue(ln
); 
4879         addReplyBulk(c
,ele
); 
4884 static void ltrimCommand(redisClient 
*c
) { 
4886     int start 
= atoi(c
->argv
[2]->ptr
); 
4887     int end 
= atoi(c
->argv
[3]->ptr
); 
4889     int j
, ltrim
, rtrim
; 
4893     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL 
|| 
4894         checkType(c
,o
,REDIS_LIST
)) return; 
4896     llen 
= listLength(list
); 
4898     /* convert negative indexes */ 
4899     if (start 
< 0) start 
= llen
+start
; 
4900     if (end 
< 0) end 
= llen
+end
; 
4901     if (start 
< 0) start 
= 0; 
4902     if (end 
< 0) end 
= 0; 
4904     /* indexes sanity checks */ 
4905     if (start 
> end 
|| start 
>= llen
) { 
4906         /* Out of range start or start > end result in empty list */ 
4910         if (end 
>= llen
) end 
= llen
-1; 
4915     /* Remove list elements to perform the trim */ 
4916     for (j 
= 0; j 
< ltrim
; j
++) { 
4917         ln 
= listFirst(list
); 
4918         listDelNode(list
,ln
); 
4920     for (j 
= 0; j 
< rtrim
; j
++) { 
4921         ln 
= listLast(list
); 
4922         listDelNode(list
,ln
); 
4924     if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4926     addReply(c
,shared
.ok
); 
4929 static void lremCommand(redisClient 
*c
) { 
4932     listNode 
*ln
, *next
; 
4933     int toremove 
= atoi(c
->argv
[2]->ptr
); 
4937     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
4938         checkType(c
,o
,REDIS_LIST
)) return; 
4942         toremove 
= -toremove
; 
4945     ln 
= fromtail 
? list
->tail 
: list
->head
; 
4947         robj 
*ele 
= listNodeValue(ln
); 
4949         next 
= fromtail 
? ln
->prev 
: ln
->next
; 
4950         if (equalStringObjects(ele
,c
->argv
[3])) { 
4951             listDelNode(list
,ln
); 
4954             if (toremove 
&& removed 
== toremove
) break; 
4958     if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
4959     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
)); 
4962 /* This is the semantic of this command: 
4963  *  RPOPLPUSH srclist dstlist: 
4964  *   IF LLEN(srclist) > 0 
4965  *     element = RPOP srclist 
4966  *     LPUSH dstlist element 
4973  * The idea is to be able to get an element from a list in a reliable way 
4974  * since the element is not just returned but pushed against another list 
4975  * as well. This command was originally proposed by Ezra Zygmuntowicz. 
4977 static void rpoplpushcommand(redisClient 
*c
) { 
4982     if ((sobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4983         checkType(c
,sobj
,REDIS_LIST
)) return; 
4984     srclist 
= sobj
->ptr
; 
4985     ln 
= listLast(srclist
); 
4988         addReply(c
,shared
.nullbulk
); 
4990         robj 
*dobj 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
4991         robj 
*ele 
= listNodeValue(ln
); 
4994         if (dobj 
&& dobj
->type 
!= REDIS_LIST
) { 
4995             addReply(c
,shared
.wrongtypeerr
); 
4999         /* Add the element to the target list (unless it's directly 
5000          * passed to some BLPOP-ing client */ 
5001         if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) { 
5003                 /* Create the list if the key does not exist */ 
5004                 dobj 
= createListObject(); 
5005                 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
); 
5006                 incrRefCount(c
->argv
[2]); 
5008             dstlist 
= dobj
->ptr
; 
5009             listAddNodeHead(dstlist
,ele
); 
5013         /* Send the element to the client as reply as well */ 
5014         addReplyBulk(c
,ele
); 
5016         /* Finally remove the element from the source list */ 
5017         listDelNode(srclist
,ln
); 
5018         if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5023 /* ==================================== Sets ================================ */ 
5025 static void saddCommand(redisClient 
*c
) { 
5028     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5030         set 
= createSetObject(); 
5031         dictAdd(c
->db
->dict
,c
->argv
[1],set
); 
5032         incrRefCount(c
->argv
[1]); 
5034         if (set
->type 
!= REDIS_SET
) { 
5035             addReply(c
,shared
.wrongtypeerr
); 
5039     if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) { 
5040         incrRefCount(c
->argv
[2]); 
5042         addReply(c
,shared
.cone
); 
5044         addReply(c
,shared
.czero
); 
5048 static void sremCommand(redisClient 
*c
) { 
5051     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5052         checkType(c
,set
,REDIS_SET
)) return; 
5054     if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) { 
5056         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
5057         if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5058         addReply(c
,shared
.cone
); 
5060         addReply(c
,shared
.czero
); 
5064 static void smoveCommand(redisClient 
*c
) { 
5065     robj 
*srcset
, *dstset
; 
5067     srcset 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5068     dstset 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
5070     /* If the source key does not exist return 0, if it's of the wrong type 
5072     if (srcset 
== NULL 
|| srcset
->type 
!= REDIS_SET
) { 
5073         addReply(c
, srcset 
? shared
.wrongtypeerr 
: shared
.czero
); 
5076     /* Error if the destination key is not a set as well */ 
5077     if (dstset 
&& dstset
->type 
!= REDIS_SET
) { 
5078         addReply(c
,shared
.wrongtypeerr
); 
5081     /* Remove the element from the source set */ 
5082     if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) { 
5083         /* Key not found in the src set! return zero */ 
5084         addReply(c
,shared
.czero
); 
5087     if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset 
!= dstset
) 
5088         deleteKey(c
->db
,c
->argv
[1]); 
5090     /* Add the element to the destination set */ 
5092         dstset 
= createSetObject(); 
5093         dictAdd(c
->db
->dict
,c
->argv
[2],dstset
); 
5094         incrRefCount(c
->argv
[2]); 
5096     if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
) 
5097         incrRefCount(c
->argv
[3]); 
5098     addReply(c
,shared
.cone
); 
5101 static void sismemberCommand(redisClient 
*c
) { 
5104     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5105         checkType(c
,set
,REDIS_SET
)) return; 
5107     if (dictFind(set
->ptr
,c
->argv
[2])) 
5108         addReply(c
,shared
.cone
); 
5110         addReply(c
,shared
.czero
); 
5113 static void scardCommand(redisClient 
*c
) { 
5117     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5118         checkType(c
,o
,REDIS_SET
)) return; 
5121     addReplyUlong(c
,dictSize(s
)); 
5124 static void spopCommand(redisClient 
*c
) { 
5128     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5129         checkType(c
,set
,REDIS_SET
)) return; 
5131     de 
= dictGetRandomKey(set
->ptr
); 
5133         addReply(c
,shared
.nullbulk
); 
5135         robj 
*ele 
= dictGetEntryKey(de
); 
5137         addReplyBulk(c
,ele
); 
5138         dictDelete(set
->ptr
,ele
); 
5139         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
5140         if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5145 static void srandmemberCommand(redisClient 
*c
) { 
5149     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5150         checkType(c
,set
,REDIS_SET
)) return; 
5152     de 
= dictGetRandomKey(set
->ptr
); 
5154         addReply(c
,shared
.nullbulk
); 
5156         robj 
*ele 
= dictGetEntryKey(de
); 
5158         addReplyBulk(c
,ele
); 
5162 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) { 
5163     dict 
**d1 
= (void*) s1
, **d2 
= (void*) s2
; 
5165     return dictSize(*d1
)-dictSize(*d2
); 
5168 static void sinterGenericCommand(redisClient 
*c
, robj 
**setskeys
, unsigned long setsnum
, robj 
*dstkey
) { 
5169     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
5172     robj 
*lenobj 
= NULL
, *dstset 
= NULL
; 
5173     unsigned long j
, cardinality 
= 0; 
5175     for (j 
= 0; j 
< setsnum
; j
++) { 
5179                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
5180                     lookupKeyRead(c
->db
,setskeys
[j
]); 
5184                 if (deleteKey(c
->db
,dstkey
)) 
5186                 addReply(c
,shared
.czero
); 
5188                 addReply(c
,shared
.emptymultibulk
); 
5192         if (setobj
->type 
!= REDIS_SET
) { 
5194             addReply(c
,shared
.wrongtypeerr
); 
5197         dv
[j
] = setobj
->ptr
; 
5199     /* Sort sets from the smallest to largest, this will improve our 
5200      * algorithm's performace */ 
5201     qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
); 
5203     /* The first thing we should output is the total number of elements... 
5204      * since this is a multi-bulk write, but at this stage we don't know 
5205      * the intersection set size, so we use a trick, append an empty object 
5206      * to the output list and save the pointer to later modify it with the 
5209         lenobj 
= createObject(REDIS_STRING
,NULL
); 
5211         decrRefCount(lenobj
); 
5213         /* If we have a target key where to store the resulting set 
5214          * create this key with an empty set inside */ 
5215         dstset 
= createSetObject(); 
5218     /* Iterate all the elements of the first (smallest) set, and test 
5219      * the element against all the other sets, if at least one set does 
5220      * not include the element it is discarded */ 
5221     di 
= dictGetIterator(dv
[0]); 
5223     while((de 
= dictNext(di
)) != NULL
) { 
5226         for (j 
= 1; j 
< setsnum
; j
++) 
5227             if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break; 
5229             continue; /* at least one set does not contain the member */ 
5230         ele 
= dictGetEntryKey(de
); 
5232             addReplyBulk(c
,ele
); 
5235             dictAdd(dstset
->ptr
,ele
,NULL
); 
5239     dictReleaseIterator(di
); 
5242         /* Store the resulting set into the target, if the intersection 
5243          * is not an empty set. */ 
5244         deleteKey(c
->db
,dstkey
); 
5245         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
5246             dictAdd(c
->db
->dict
,dstkey
,dstset
); 
5247             incrRefCount(dstkey
); 
5248             addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
)); 
5250             decrRefCount(dstset
); 
5251             addReply(c
,shared
.czero
); 
5255         lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
); 
5260 static void sinterCommand(redisClient 
*c
) { 
5261     sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
); 
5264 static void sinterstoreCommand(redisClient 
*c
) { 
5265     sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]); 
5268 #define REDIS_OP_UNION 0 
5269 #define REDIS_OP_DIFF 1 
5270 #define REDIS_OP_INTER 2 
5272 static void sunionDiffGenericCommand(redisClient 
*c
, robj 
**setskeys
, int setsnum
, robj 
*dstkey
, int op
) { 
5273     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
5276     robj 
*dstset 
= NULL
; 
5277     int j
, cardinality 
= 0; 
5279     for (j 
= 0; j 
< setsnum
; j
++) { 
5283                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
5284                     lookupKeyRead(c
->db
,setskeys
[j
]); 
5289         if (setobj
->type 
!= REDIS_SET
) { 
5291             addReply(c
,shared
.wrongtypeerr
); 
5294         dv
[j
] = setobj
->ptr
; 
5297     /* We need a temp set object to store our union. If the dstkey 
5298      * is not NULL (that is, we are inside an SUNIONSTORE operation) then 
5299      * this set object will be the resulting object to set into the target key*/ 
5300     dstset 
= createSetObject(); 
5302     /* Iterate all the elements of all the sets, add every element a single 
5303      * time to the result set */ 
5304     for (j 
= 0; j 
< setsnum
; j
++) { 
5305         if (op 
== REDIS_OP_DIFF 
&& j 
== 0 && !dv
[j
]) break; /* result set is empty */ 
5306         if (!dv
[j
]) continue; /* non existing keys are like empty sets */ 
5308         di 
= dictGetIterator(dv
[j
]); 
5310         while((de 
= dictNext(di
)) != NULL
) { 
5313             /* dictAdd will not add the same element multiple times */ 
5314             ele 
= dictGetEntryKey(de
); 
5315             if (op 
== REDIS_OP_UNION 
|| j 
== 0) { 
5316                 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) { 
5320             } else if (op 
== REDIS_OP_DIFF
) { 
5321                 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) { 
5326         dictReleaseIterator(di
); 
5328         /* result set is empty? Exit asap. */ 
5329         if (op 
== REDIS_OP_DIFF 
&& cardinality 
== 0) break; 
5332     /* Output the content of the resulting set, if not in STORE mode */ 
5334         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
)); 
5335         di 
= dictGetIterator(dstset
->ptr
); 
5336         while((de 
= dictNext(di
)) != NULL
) { 
5339             ele 
= dictGetEntryKey(de
); 
5340             addReplyBulk(c
,ele
); 
5342         dictReleaseIterator(di
); 
5343         decrRefCount(dstset
); 
5345         /* If we have a target key where to store the resulting set 
5346          * create this key with the result set inside */ 
5347         deleteKey(c
->db
,dstkey
); 
5348         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
5349             dictAdd(c
->db
->dict
,dstkey
,dstset
); 
5350             incrRefCount(dstkey
); 
5351             addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
)); 
5353             decrRefCount(dstset
); 
5354             addReply(c
,shared
.czero
); 
5361 static void sunionCommand(redisClient 
*c
) { 
5362     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
); 
5365 static void sunionstoreCommand(redisClient 
*c
) { 
5366     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
); 
5369 static void sdiffCommand(redisClient 
*c
) { 
5370     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
); 
5373 static void sdiffstoreCommand(redisClient 
*c
) { 
5374     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
); 
5377 /* ==================================== ZSets =============================== */ 
5379 /* ZSETs are ordered sets using two data structures to hold the same elements 
5380  * in order to get O(log(N)) INSERT and REMOVE operations into a sorted 
5383  * The elements are added to an hash table mapping Redis objects to scores. 
5384  * At the same time the elements are added to a skip list mapping scores 
5385  * to Redis objects (so objects are sorted by scores in this "view"). */ 
5387 /* This skiplist implementation is almost a C translation of the original 
5388  * algorithm described by William Pugh in "Skip Lists: A Probabilistic 
5389  * Alternative to Balanced Trees", modified in three ways: 
5390  * a) this implementation allows for repeated values. 
5391  * b) the comparison is not just by key (our 'score') but by satellite data. 
5392  * c) there is a back pointer, so it's a doubly linked list with the back 
5393  * pointers being only at "level 1". This allows to traverse the list 
5394  * from tail to head, useful for ZREVRANGE. */ 
5396 static zskiplistNode 
*zslCreateNode(int level
, double score
, robj 
*obj
) { 
5397     zskiplistNode 
*zn 
= zmalloc(sizeof(*zn
)); 
5399     zn
->forward 
= zmalloc(sizeof(zskiplistNode
*) * level
); 
5401         zn
->span 
= zmalloc(sizeof(unsigned int) * (level 
- 1)); 
5407 static zskiplist 
*zslCreate(void) { 
5411     zsl 
= zmalloc(sizeof(*zsl
)); 
5414     zsl
->header 
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
); 
5415     for (j 
= 0; j 
< ZSKIPLIST_MAXLEVEL
; j
++) { 
5416         zsl
->header
->forward
[j
] = NULL
; 
5418         /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */ 
5419         if (j 
< ZSKIPLIST_MAXLEVEL
-1) 
5420             zsl
->header
->span
[j
] = 0; 
5422     zsl
->header
->backward 
= NULL
; 
5427 static void zslFreeNode(zskiplistNode 
*node
) { 
5428     decrRefCount(node
->obj
); 
5429     zfree(node
->forward
); 
5434 static void zslFree(zskiplist 
*zsl
) { 
5435     zskiplistNode 
*node 
= zsl
->header
->forward
[0], *next
; 
5437     zfree(zsl
->header
->forward
); 
5438     zfree(zsl
->header
->span
); 
5441         next 
= node
->forward
[0]; 
5448 static int zslRandomLevel(void) { 
5450     while ((random()&0xFFFF) < (ZSKIPLIST_P 
* 0xFFFF)) 
5452     return (level
<ZSKIPLIST_MAXLEVEL
) ? level 
: ZSKIPLIST_MAXLEVEL
; 
5455 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5456     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5457     unsigned int rank
[ZSKIPLIST_MAXLEVEL
]; 
5461     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5462         /* store rank that is crossed to reach the insert position */ 
5463         rank
[i
] = i 
== (zsl
->level
-1) ? 0 : rank
[i
+1]; 
5465         while (x
->forward
[i
] && 
5466             (x
->forward
[i
]->score 
< score 
|| 
5467                 (x
->forward
[i
]->score 
== score 
&& 
5468                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) { 
5469             rank
[i
] += i 
> 0 ? x
->span
[i
-1] : 1; 
5474     /* we assume the key is not already inside, since we allow duplicated 
5475      * scores, and the re-insertion of score and redis object should never 
5476      * happpen since the caller of zslInsert() should test in the hash table 
5477      * if the element is already inside or not. */ 
5478     level 
= zslRandomLevel(); 
5479     if (level 
> zsl
->level
) { 
5480         for (i 
= zsl
->level
; i 
< level
; i
++) { 
5482             update
[i
] = zsl
->header
; 
5483             update
[i
]->span
[i
-1] = zsl
->length
; 
5487     x 
= zslCreateNode(level
,score
,obj
); 
5488     for (i 
= 0; i 
< level
; i
++) { 
5489         x
->forward
[i
] = update
[i
]->forward
[i
]; 
5490         update
[i
]->forward
[i
] = x
; 
5492         /* update span covered by update[i] as x is inserted here */ 
5494             x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]); 
5495             update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1; 
5499     /* increment span for untouched levels */ 
5500     for (i 
= level
; i 
< zsl
->level
; i
++) { 
5501         update
[i
]->span
[i
-1]++; 
5504     x
->backward 
= (update
[0] == zsl
->header
) ? NULL 
: update
[0]; 
5506         x
->forward
[0]->backward 
= x
; 
5512 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */ 
5513 void zslDeleteNode(zskiplist 
*zsl
, zskiplistNode 
*x
, zskiplistNode 
**update
) { 
5515     for (i 
= 0; i 
< zsl
->level
; i
++) { 
5516         if (update
[i
]->forward
[i
] == x
) { 
5518                 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1; 
5520             update
[i
]->forward
[i
] = x
->forward
[i
]; 
5522             /* invariant: i > 0, because update[0]->forward[0] 
5523              * is always equal to x */ 
5524             update
[i
]->span
[i
-1] -= 1; 
5527     if (x
->forward
[0]) { 
5528         x
->forward
[0]->backward 
= x
->backward
; 
5530         zsl
->tail 
= x
->backward
; 
5532     while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
5537 /* Delete an element with matching score/object from the skiplist. */ 
5538 static int zslDelete(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5539     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5543     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5544         while (x
->forward
[i
] && 
5545             (x
->forward
[i
]->score 
< score 
|| 
5546                 (x
->forward
[i
]->score 
== score 
&& 
5547                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
5551     /* We may have multiple elements with the same score, what we need 
5552      * is to find the element with both the right score and object. */ 
5554     if (x 
&& score 
== x
->score 
&& equalStringObjects(x
->obj
,obj
)) { 
5555         zslDeleteNode(zsl
, x
, update
); 
5559         return 0; /* not found */ 
5561     return 0; /* not found */ 
5564 /* Delete all the elements with score between min and max from the skiplist. 
5565  * Min and mx are inclusive, so a score >= min || score <= max is deleted. 
5566  * Note that this function takes the reference to the hash table view of the 
5567  * sorted set, in order to remove the elements from the hash table too. */ 
5568 static unsigned long zslDeleteRangeByScore(zskiplist 
*zsl
, double min
, double max
, dict 
*dict
) { 
5569     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5570     unsigned long removed 
= 0; 
5574     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5575         while (x
->forward
[i
] && x
->forward
[i
]->score 
< min
) 
5579     /* We may have multiple elements with the same score, what we need 
5580      * is to find the element with both the right score and object. */ 
5582     while (x 
&& x
->score 
<= max
) { 
5583         zskiplistNode 
*next 
= x
->forward
[0]; 
5584         zslDeleteNode(zsl
, x
, update
); 
5585         dictDelete(dict
,x
->obj
); 
5590     return removed
; /* not found */ 
5593 /* Delete all the elements with rank between start and end from the skiplist. 
5594  * Start and end are inclusive. Note that start and end need to be 1-based */ 
5595 static unsigned long zslDeleteRangeByRank(zskiplist 
*zsl
, unsigned int start
, unsigned int end
, dict 
*dict
) { 
5596     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5597     unsigned long traversed 
= 0, removed 
= 0; 
5601     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5602         while (x
->forward
[i
] && (traversed 
+ (i 
> 0 ? x
->span
[i
-1] : 1)) < start
) { 
5603             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5611     while (x 
&& traversed 
<= end
) { 
5612         zskiplistNode 
*next 
= x
->forward
[0]; 
5613         zslDeleteNode(zsl
, x
, update
); 
5614         dictDelete(dict
,x
->obj
); 
5623 /* Find the first node having a score equal or greater than the specified one. 
5624  * Returns NULL if there is no match. */ 
5625 static zskiplistNode 
*zslFirstWithScore(zskiplist 
*zsl
, double score
) { 
5630     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5631         while (x
->forward
[i
] && x
->forward
[i
]->score 
< score
) 
5634     /* We may have multiple elements with the same score, what we need 
5635      * is to find the element with both the right score and object. */ 
5636     return x
->forward
[0]; 
5639 /* Find the rank for an element by both score and key. 
5640  * Returns 0 when the element cannot be found, rank otherwise. 
5641  * Note that the rank is 1-based due to the span of zsl->header to the 
5643 static unsigned long zslGetRank(zskiplist 
*zsl
, double score
, robj 
*o
) { 
5645     unsigned long rank 
= 0; 
5649     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5650         while (x
->forward
[i
] && 
5651             (x
->forward
[i
]->score 
< score 
|| 
5652                 (x
->forward
[i
]->score 
== score 
&& 
5653                 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) { 
5654             rank 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5658         /* x might be equal to zsl->header, so test if obj is non-NULL */ 
5659         if (x
->obj 
&& equalStringObjects(x
->obj
,o
)) { 
5666 /* Finds an element by its rank. The rank argument needs to be 1-based. */ 
5667 zskiplistNode
* zslGetElementByRank(zskiplist 
*zsl
, unsigned long rank
) { 
5669     unsigned long traversed 
= 0; 
5673     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5674         while (x
->forward
[i
] && (traversed 
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
) 
5676             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
5679         if (traversed 
== rank
) { 
5686 /* The actual Z-commands implementations */ 
5688 /* This generic command implements both ZADD and ZINCRBY. 
5689  * scoreval is the score if the operation is a ZADD (doincrement == 0) or 
5690  * the increment if the operation is a ZINCRBY (doincrement == 1). */ 
5691 static void zaddGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*ele
, double scoreval
, int doincrement
) { 
5696     zsetobj 
= lookupKeyWrite(c
->db
,key
); 
5697     if (zsetobj 
== NULL
) { 
5698         zsetobj 
= createZsetObject(); 
5699         dictAdd(c
->db
->dict
,key
,zsetobj
); 
5702         if (zsetobj
->type 
!= REDIS_ZSET
) { 
5703             addReply(c
,shared
.wrongtypeerr
); 
5709     /* Ok now since we implement both ZADD and ZINCRBY here the code 
5710      * needs to handle the two different conditions. It's all about setting 
5711      * '*score', that is, the new score to set, to the right value. */ 
5712     score 
= zmalloc(sizeof(double)); 
5716         /* Read the old score. If the element was not present starts from 0 */ 
5717         de 
= dictFind(zs
->dict
,ele
); 
5719             double *oldscore 
= dictGetEntryVal(de
); 
5720             *score 
= *oldscore 
+ scoreval
; 
5728     /* What follows is a simple remove and re-insert operation that is common 
5729      * to both ZADD and ZINCRBY... */ 
5730     if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) { 
5731         /* case 1: New element */ 
5732         incrRefCount(ele
); /* added to hash */ 
5733         zslInsert(zs
->zsl
,*score
,ele
); 
5734         incrRefCount(ele
); /* added to skiplist */ 
5737             addReplyDouble(c
,*score
); 
5739             addReply(c
,shared
.cone
); 
5744         /* case 2: Score update operation */ 
5745         de 
= dictFind(zs
->dict
,ele
); 
5746         redisAssert(de 
!= NULL
); 
5747         oldscore 
= dictGetEntryVal(de
); 
5748         if (*score 
!= *oldscore
) { 
5751             /* Remove and insert the element in the skip list with new score */ 
5752             deleted 
= zslDelete(zs
->zsl
,*oldscore
,ele
); 
5753             redisAssert(deleted 
!= 0); 
5754             zslInsert(zs
->zsl
,*score
,ele
); 
5756             /* Update the score in the hash table */ 
5757             dictReplace(zs
->dict
,ele
,score
); 
5763             addReplyDouble(c
,*score
); 
5765             addReply(c
,shared
.czero
); 
5769 static void zaddCommand(redisClient 
*c
) { 
5772     if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return; 
5773     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0); 
5776 static void zincrbyCommand(redisClient 
*c
) { 
5779     if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return; 
5780     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1); 
5783 static void zremCommand(redisClient 
*c
) { 
5790     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5791         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5794     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
5796         addReply(c
,shared
.czero
); 
5799     /* Delete from the skiplist */ 
5800     oldscore 
= dictGetEntryVal(de
); 
5801     deleted 
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]); 
5802     redisAssert(deleted 
!= 0); 
5804     /* Delete from the hash table */ 
5805     dictDelete(zs
->dict
,c
->argv
[2]); 
5806     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5807     if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5809     addReply(c
,shared
.cone
); 
5812 static void zremrangebyscoreCommand(redisClient 
*c
) { 
5819     if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) || 
5820         (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return; 
5822     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5823         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5826     deleted 
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
); 
5827     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5828     if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5829     server
.dirty 
+= deleted
; 
5830     addReplyLongLong(c
,deleted
); 
5833 static void zremrangebyrankCommand(redisClient 
*c
) { 
5841     if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) || 
5842         (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return; 
5844     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5845         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
5847     llen 
= zs
->zsl
->length
; 
5849     /* convert negative indexes */ 
5850     if (start 
< 0) start 
= llen
+start
; 
5851     if (end 
< 0) end 
= llen
+end
; 
5852     if (start 
< 0) start 
= 0; 
5853     if (end 
< 0) end 
= 0; 
5855     /* indexes sanity checks */ 
5856     if (start 
> end 
|| start 
>= llen
) { 
5857         addReply(c
,shared
.czero
); 
5860     if (end 
>= llen
) end 
= llen
-1; 
5862     /* increment start and end because zsl*Rank functions 
5863      * use 1-based rank */ 
5864     deleted 
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
); 
5865     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
5866     if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
5867     server
.dirty 
+= deleted
; 
5868     addReplyLongLong(c
, deleted
); 
5876 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) { 
5877     zsetopsrc 
*d1 
= (void*) s1
, *d2 
= (void*) s2
; 
5878     unsigned long size1
, size2
; 
5879     size1 
= d1
->dict 
? dictSize(d1
->dict
) : 0; 
5880     size2 
= d2
->dict 
? dictSize(d2
->dict
) : 0; 
5881     return size1 
- size2
; 
5884 #define REDIS_AGGR_SUM 1 
5885 #define REDIS_AGGR_MIN 2 
5886 #define REDIS_AGGR_MAX 3 
5888 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) { 
5889     if (aggregate 
== REDIS_AGGR_SUM
) { 
5890         *target 
= *target 
+ val
; 
5891     } else if (aggregate 
== REDIS_AGGR_MIN
) { 
5892         *target 
= val 
< *target 
? val 
: *target
; 
5893     } else if (aggregate 
== REDIS_AGGR_MAX
) { 
5894         *target 
= val 
> *target 
? val 
: *target
; 
5897         redisPanic("Unknown ZUNION/INTER aggregate type"); 
5901 static void zunionInterGenericCommand(redisClient 
*c
, robj 
*dstkey
, int op
) { 
5903     int aggregate 
= REDIS_AGGR_SUM
; 
5910     /* expect zsetnum input keys to be given */ 
5911     zsetnum 
= atoi(c
->argv
[2]->ptr
); 
5913         addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n")); 
5917     /* test if the expected number of keys would overflow */ 
5918     if (3+zsetnum 
> c
->argc
) { 
5919         addReply(c
,shared
.syntaxerr
); 
5923     /* read keys to be used for input */ 
5924     src 
= zmalloc(sizeof(zsetopsrc
) * zsetnum
); 
5925     for (i 
= 0, j 
= 3; i 
< zsetnum
; i
++, j
++) { 
5926         robj 
*zsetobj 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
5930             if (zsetobj
->type 
!= REDIS_ZSET
) { 
5932                 addReply(c
,shared
.wrongtypeerr
); 
5935             src
[i
].dict 
= ((zset
*)zsetobj
->ptr
)->dict
; 
5938         /* default all weights to 1 */ 
5939         src
[i
].weight 
= 1.0; 
5942     /* parse optional extra arguments */ 
5944         int remaining 
= c
->argc 
- j
; 
5947             if (remaining 
>= (zsetnum 
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) { 
5949                 for (i 
= 0; i 
< zsetnum
; i
++, j
++, remaining
--) { 
5950                     if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
) 
5953             } else if (remaining 
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) { 
5955                 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) { 
5956                     aggregate 
= REDIS_AGGR_SUM
; 
5957                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) { 
5958                     aggregate 
= REDIS_AGGR_MIN
; 
5959                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) { 
5960                     aggregate 
= REDIS_AGGR_MAX
; 
5963                     addReply(c
,shared
.syntaxerr
); 
5969                 addReply(c
,shared
.syntaxerr
); 
5975     /* sort sets from the smallest to largest, this will improve our 
5976      * algorithm's performance */ 
5977     qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
); 
5979     dstobj 
= createZsetObject(); 
5980     dstzset 
= dstobj
->ptr
; 
5982     if (op 
== REDIS_OP_INTER
) { 
5983         /* skip going over all entries if the smallest zset is NULL or empty */ 
5984         if (src
[0].dict 
&& dictSize(src
[0].dict
) > 0) { 
5985             /* precondition: as src[0].dict is non-empty and the zsets are ordered 
5986              * from small to large, all src[i > 0].dict are non-empty too */ 
5987             di 
= dictGetIterator(src
[0].dict
); 
5988             while((de 
= dictNext(di
)) != NULL
) { 
5989                 double *score 
= zmalloc(sizeof(double)), value
; 
5990                 *score 
= src
[0].weight 
* (*(double*)dictGetEntryVal(de
)); 
5992                 for (j 
= 1; j 
< zsetnum
; j
++) { 
5993                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
5995                         value 
= src
[j
].weight 
* (*(double*)dictGetEntryVal(other
)); 
5996                         zunionInterAggregate(score
, value
, aggregate
); 
6002                 /* skip entry when not present in every source dict */ 
6006                     robj 
*o 
= dictGetEntryKey(de
); 
6007                     dictAdd(dstzset
->dict
,o
,score
); 
6008                     incrRefCount(o
); /* added to dictionary */ 
6009                     zslInsert(dstzset
->zsl
,*score
,o
); 
6010                     incrRefCount(o
); /* added to skiplist */ 
6013             dictReleaseIterator(di
); 
6015     } else if (op 
== REDIS_OP_UNION
) { 
6016         for (i 
= 0; i 
< zsetnum
; i
++) { 
6017             if (!src
[i
].dict
) continue; 
6019             di 
= dictGetIterator(src
[i
].dict
); 
6020             while((de 
= dictNext(di
)) != NULL
) { 
6021                 /* skip key when already processed */ 
6022                 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue; 
6024                 double *score 
= zmalloc(sizeof(double)), value
; 
6025                 *score 
= src
[i
].weight 
* (*(double*)dictGetEntryVal(de
)); 
6027                 /* because the zsets are sorted by size, its only possible 
6028                  * for sets at larger indices to hold this entry */ 
6029                 for (j 
= (i
+1); j 
< zsetnum
; j
++) { 
6030                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
6032                         value 
= src
[j
].weight 
* (*(double*)dictGetEntryVal(other
)); 
6033                         zunionInterAggregate(score
, value
, aggregate
); 
6037                 robj 
*o 
= dictGetEntryKey(de
); 
6038                 dictAdd(dstzset
->dict
,o
,score
); 
6039                 incrRefCount(o
); /* added to dictionary */ 
6040                 zslInsert(dstzset
->zsl
,*score
,o
); 
6041                 incrRefCount(o
); /* added to skiplist */ 
6043             dictReleaseIterator(di
); 
6046         /* unknown operator */ 
6047         redisAssert(op 
== REDIS_OP_INTER 
|| op 
== REDIS_OP_UNION
); 
6050     deleteKey(c
->db
,dstkey
); 
6051     if (dstzset
->zsl
->length
) { 
6052         dictAdd(c
->db
->dict
,dstkey
,dstobj
); 
6053         incrRefCount(dstkey
); 
6054         addReplyLongLong(c
, dstzset
->zsl
->length
); 
6057         decrRefCount(dstobj
); 
6058         addReply(c
, shared
.czero
); 
6063 static void zunionstoreCommand(redisClient 
*c
) { 
6064     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
); 
6067 static void zinterstoreCommand(redisClient 
*c
) { 
6068     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
); 
6071 static void zrangeGenericCommand(redisClient 
*c
, int reverse
) { 
6083     if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) || 
6084         (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return; 
6086     if (c
->argc 
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) { 
6088     } else if (c
->argc 
>= 5) { 
6089         addReply(c
,shared
.syntaxerr
); 
6093     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
6094          || checkType(c
,o
,REDIS_ZSET
)) return; 
6099     /* convert negative indexes */ 
6100     if (start 
< 0) start 
= llen
+start
; 
6101     if (end 
< 0) end 
= llen
+end
; 
6102     if (start 
< 0) start 
= 0; 
6103     if (end 
< 0) end 
= 0; 
6105     /* indexes sanity checks */ 
6106     if (start 
> end 
|| start 
>= llen
) { 
6107         /* Out of range start or start > end result in empty list */ 
6108         addReply(c
,shared
.emptymultibulk
); 
6111     if (end 
>= llen
) end 
= llen
-1; 
6112     rangelen 
= (end
-start
)+1; 
6114     /* check if starting point is trivial, before searching 
6115      * the element in log(N) time */ 
6117         ln 
= start 
== 0 ? zsl
->tail 
: zslGetElementByRank(zsl
, llen
-start
); 
6120             zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1); 
6123     /* Return the result in form of a multi-bulk reply */ 
6124     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n", 
6125         withscores 
? (rangelen
*2) : rangelen
)); 
6126     for (j 
= 0; j 
< rangelen
; j
++) { 
6128         addReplyBulk(c
,ele
); 
6130             addReplyDouble(c
,ln
->score
); 
6131         ln 
= reverse 
? ln
->backward 
: ln
->forward
[0]; 
6135 static void zrangeCommand(redisClient 
*c
) { 
6136     zrangeGenericCommand(c
,0); 
6139 static void zrevrangeCommand(redisClient 
*c
) { 
6140     zrangeGenericCommand(c
,1); 
6143 /* This command implements both ZRANGEBYSCORE and ZCOUNT. 
6144  * If justcount is non-zero, just the count is returned. */ 
6145 static void genericZrangebyscoreCommand(redisClient 
*c
, int justcount
) { 
6148     int minex 
= 0, maxex 
= 0; /* are min or max exclusive? */ 
6149     int offset 
= 0, limit 
= -1; 
6153     /* Parse the min-max interval. If one of the values is prefixed 
6154      * by the "(" character, it's considered "open". For instance 
6155      * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max 
6156      * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ 
6157     if (((char*)c
->argv
[2]->ptr
)[0] == '(') { 
6158         min 
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
); 
6161         min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
6163     if (((char*)c
->argv
[3]->ptr
)[0] == '(') { 
6164         max 
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
); 
6167         max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
6170     /* Parse "WITHSCORES": note that if the command was called with 
6171      * the name ZCOUNT then we are sure that c->argc == 4, so we'll never 
6172      * enter the following paths to parse WITHSCORES and LIMIT. */ 
6173     if (c
->argc 
== 5 || c
->argc 
== 8) { 
6174         if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0) 
6179     if (c
->argc 
!= (4 + withscores
) && c
->argc 
!= (7 + withscores
)) 
6183             sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); 
6188     if (c
->argc 
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) { 
6189         addReply(c
,shared
.syntaxerr
); 
6191     } else if (c
->argc 
== (7 + withscores
)) { 
6192         offset 
= atoi(c
->argv
[5]->ptr
); 
6193         limit 
= atoi(c
->argv
[6]->ptr
); 
6194         if (offset 
< 0) offset 
= 0; 
6197     /* Ok, lookup the key and get the range */ 
6198     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6200         addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
6202         if (o
->type 
!= REDIS_ZSET
) { 
6203             addReply(c
,shared
.wrongtypeerr
); 
6205             zset 
*zsetobj 
= o
->ptr
; 
6206             zskiplist 
*zsl 
= zsetobj
->zsl
; 
6208             robj 
*ele
, *lenobj 
= NULL
; 
6209             unsigned long rangelen 
= 0; 
6211             /* Get the first node with the score >= min, or with 
6212              * score > min if 'minex' is true. */ 
6213             ln 
= zslFirstWithScore(zsl
,min
); 
6214             while (minex 
&& ln 
&& ln
->score 
== min
) ln 
= ln
->forward
[0]; 
6217                 /* No element matching the speciifed interval */ 
6218                 addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
6222             /* We don't know in advance how many matching elements there 
6223              * are in the list, so we push this object that will represent 
6224              * the multi-bulk length in the output buffer, and will "fix" 
6227                 lenobj 
= createObject(REDIS_STRING
,NULL
); 
6229                 decrRefCount(lenobj
); 
6232             while(ln 
&& (maxex 
? (ln
->score 
< max
) : (ln
->score 
<= max
))) { 
6235                     ln 
= ln
->forward
[0]; 
6238                 if (limit 
== 0) break; 
6241                     addReplyBulk(c
,ele
); 
6243                         addReplyDouble(c
,ln
->score
); 
6245                 ln 
= ln
->forward
[0]; 
6247                 if (limit 
> 0) limit
--; 
6250                 addReplyLongLong(c
,(long)rangelen
); 
6252                 lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n", 
6253                      withscores 
? (rangelen
*2) : rangelen
); 
6259 static void zrangebyscoreCommand(redisClient 
*c
) { 
6260     genericZrangebyscoreCommand(c
,0); 
6263 static void zcountCommand(redisClient 
*c
) { 
6264     genericZrangebyscoreCommand(c
,1); 
6267 static void zcardCommand(redisClient 
*c
) { 
6271     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6272         checkType(c
,o
,REDIS_ZSET
)) return; 
6275     addReplyUlong(c
,zs
->zsl
->length
); 
6278 static void zscoreCommand(redisClient 
*c
) { 
6283     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6284         checkType(c
,o
,REDIS_ZSET
)) return; 
6287     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6289         addReply(c
,shared
.nullbulk
); 
6291         double *score 
= dictGetEntryVal(de
); 
6293         addReplyDouble(c
,*score
); 
6297 static void zrankGenericCommand(redisClient 
*c
, int reverse
) { 
6305     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6306         checkType(c
,o
,REDIS_ZSET
)) return; 
6310     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6312         addReply(c
,shared
.nullbulk
); 
6316     score 
= dictGetEntryVal(de
); 
6317     rank 
= zslGetRank(zsl
, *score
, c
->argv
[2]); 
6320             addReplyLongLong(c
, zsl
->length 
- rank
); 
6322             addReplyLongLong(c
, rank
-1); 
6325         addReply(c
,shared
.nullbulk
); 
6329 static void zrankCommand(redisClient 
*c
) { 
6330     zrankGenericCommand(c
, 0); 
6333 static void zrevrankCommand(redisClient 
*c
) { 
6334     zrankGenericCommand(c
, 1); 
6337 /* ========================= Hashes utility functions ======================= */ 
6338 #define REDIS_HASH_KEY 1 
6339 #define REDIS_HASH_VALUE 2 
6341 /* Check the length of a number of objects to see if we need to convert a 
6342  * zipmap to a real hash. Note that we only check string encoded objects 
6343  * as their string length can be queried in constant time. */ 
6344 static void hashTryConversion(robj 
*subject
, robj 
**argv
, int start
, int end
) { 
6346     if (subject
->encoding 
!= REDIS_ENCODING_ZIPMAP
) return; 
6348     for (i 
= start
; i 
<= end
; i
++) { 
6349         if (argv
[i
]->encoding 
== REDIS_ENCODING_RAW 
&& 
6350             sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
) 
6352             convertToRealHash(subject
); 
6358 /* Encode given objects in-place when the hash uses a dict. */ 
6359 static void hashTryObjectEncoding(robj 
*subject
, robj 
**o1
, robj 
**o2
) { 
6360     if (subject
->encoding 
== REDIS_ENCODING_HT
) { 
6361         if (o1
) *o1 
= tryObjectEncoding(*o1
); 
6362         if (o2
) *o2 
= tryObjectEncoding(*o2
); 
6366 /* Get the value from a hash identified by key. Returns either a string 
6367  * object or NULL if the value cannot be found. The refcount of the object 
6368  * is always increased by 1 when the value was found. */ 
6369 static robj 
*hashGet(robj 
*o
, robj 
*key
) { 
6371     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6374         key 
= getDecodedObject(key
); 
6375         if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) { 
6376             value 
= createStringObject((char*)v
,vlen
); 
6380         dictEntry 
*de 
= dictFind(o
->ptr
,key
); 
6382             value 
= dictGetEntryVal(de
); 
6383             incrRefCount(value
); 
6389 /* Test if the key exists in the given hash. Returns 1 if the key 
6390  * exists and 0 when it doesn't. */ 
6391 static int hashExists(robj 
*o
, robj 
*key
) { 
6392     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6393         key 
= getDecodedObject(key
); 
6394         if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) { 
6400         if (dictFind(o
->ptr
,key
) != NULL
) { 
6407 /* Add an element, discard the old if the key already exists. 
6408  * Return 0 on insert and 1 on update. */ 
6409 static int hashSet(robj 
*o
, robj 
*key
, robj 
*value
) { 
6411     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6412         key 
= getDecodedObject(key
); 
6413         value 
= getDecodedObject(value
); 
6414         o
->ptr 
= zipmapSet(o
->ptr
, 
6415             key
->ptr
,sdslen(key
->ptr
), 
6416             value
->ptr
,sdslen(value
->ptr
), &update
); 
6418         decrRefCount(value
); 
6420         /* Check if the zipmap needs to be upgraded to a real hash table */ 
6421         if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
) 
6422             convertToRealHash(o
); 
6424         if (dictReplace(o
->ptr
,key
,value
)) { 
6431         incrRefCount(value
); 
6436 /* Delete an element from a hash. 
6437  * Return 1 on deleted and 0 on not found. */ 
6438 static int hashDelete(robj 
*o
, robj 
*key
) { 
6440     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6441         key 
= getDecodedObject(key
); 
6442         o
->ptr 
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
); 
6445         deleted 
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
; 
6446         /* Always check if the dictionary needs a resize after a delete. */ 
6447         if (deleted 
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
); 
6452 /* Return the number of elements in a hash. */ 
6453 static unsigned long hashLength(robj 
*o
) { 
6454     return (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) ? 
6455         zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
); 
6458 /* Structure to hold hash iteration abstration. Note that iteration over 
6459  * hashes involves both fields and values. Because it is possible that 
6460  * not both are required, store pointers in the iterator to avoid 
6461  * unnecessary memory allocation for fields/values. */ 
6465     unsigned char *zk
, *zv
; 
6466     unsigned int zklen
, zvlen
; 
6472 static hashIterator 
*hashInitIterator(robj 
*subject
) { 
6473     hashIterator 
*hi 
= zmalloc(sizeof(hashIterator
)); 
6474     hi
->encoding 
= subject
->encoding
; 
6475     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6476         hi
->zi 
= zipmapRewind(subject
->ptr
); 
6477     } else if (hi
->encoding 
== REDIS_ENCODING_HT
) { 
6478         hi
->di 
= dictGetIterator(subject
->ptr
); 
6485 static void hashReleaseIterator(hashIterator 
*hi
) { 
6486     if (hi
->encoding 
== REDIS_ENCODING_HT
) { 
6487         dictReleaseIterator(hi
->di
); 
6492 /* Move to the next entry in the hash. Return REDIS_OK when the next entry 
6493  * could be found and REDIS_ERR when the iterator reaches the end. */ 
6494 static int hashNext(hashIterator 
*hi
) { 
6495     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6496         if ((hi
->zi 
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
, 
6497             &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
; 
6499         if ((hi
->de 
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
; 
6504 /* Get key or value object at current iteration position. 
6505  * This increases the refcount of the field object by 1. */ 
6506 static robj 
*hashCurrent(hashIterator 
*hi
, int what
) { 
6508     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6509         if (what 
& REDIS_HASH_KEY
) { 
6510             o 
= createStringObject((char*)hi
->zk
,hi
->zklen
); 
6512             o 
= createStringObject((char*)hi
->zv
,hi
->zvlen
); 
6515         if (what 
& REDIS_HASH_KEY
) { 
6516             o 
= dictGetEntryKey(hi
->de
); 
6518             o 
= dictGetEntryVal(hi
->de
); 
6525 static robj 
*hashLookupWriteOrCreate(redisClient 
*c
, robj 
*key
) { 
6526     robj 
*o 
= lookupKeyWrite(c
->db
,key
); 
6528         o 
= createHashObject(); 
6529         dictAdd(c
->db
->dict
,key
,o
); 
6532         if (o
->type 
!= REDIS_HASH
) { 
6533             addReply(c
,shared
.wrongtypeerr
); 
6540 /* ============================= Hash commands ============================== */ 
6541 static void hsetCommand(redisClient 
*c
) { 
6545     if ((o 
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
6546     hashTryConversion(o
,c
->argv
,2,3); 
6547     hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]); 
6548     update 
= hashSet(o
,c
->argv
[2],c
->argv
[3]); 
6549     addReply(c
, update 
? shared
.czero 
: shared
.cone
); 
6553 static void hsetnxCommand(redisClient 
*c
) { 
6555     if ((o 
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
6556     hashTryConversion(o
,c
->argv
,2,3); 
6558     if (hashExists(o
, c
->argv
[2])) { 
6559         addReply(c
, shared
.czero
); 
6561         hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]); 
6562         hashSet(o
,c
->argv
[2],c
->argv
[3]); 
6563         addReply(c
, shared
.cone
); 
6568 static void hmsetCommand(redisClient 
*c
) { 
6572     if ((c
->argc 
% 2) == 1) { 
6573         addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n")); 
6577     if ((o 
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
6578     hashTryConversion(o
,c
->argv
,2,c
->argc
-1); 
6579     for (i 
= 2; i 
< c
->argc
; i 
+= 2) { 
6580         hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]); 
6581         hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]); 
6583     addReply(c
, shared
.ok
); 
6587 static void hincrbyCommand(redisClient 
*c
) { 
6588     long long value
, incr
; 
6589     robj 
*o
, *current
, *new; 
6591     if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return; 
6592     if ((o 
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
6593     if ((current 
= hashGet(o
,c
->argv
[2])) != NULL
) { 
6594         if (getLongLongFromObjectOrReply(c
,current
,&value
, 
6595             "hash value is not an integer") != REDIS_OK
) { 
6596             decrRefCount(current
); 
6599         decrRefCount(current
); 
6605     new = createStringObjectFromLongLong(value
); 
6606     hashTryObjectEncoding(o
,&c
->argv
[2],NULL
); 
6607     hashSet(o
,c
->argv
[2],new); 
6609     addReplyLongLong(c
,value
); 
6613 static void hgetCommand(redisClient 
*c
) { 
6615     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6616         checkType(c
,o
,REDIS_HASH
)) return; 
6618     if ((value 
= hashGet(o
,c
->argv
[2])) != NULL
) { 
6619         addReplyBulk(c
,value
); 
6620         decrRefCount(value
); 
6622         addReply(c
,shared
.nullbulk
); 
6626 static void hmgetCommand(redisClient 
*c
) { 
6629     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6630     if (o 
!= NULL 
&& o
->type 
!= REDIS_HASH
) { 
6631         addReply(c
,shared
.wrongtypeerr
); 
6634     /* Note the check for o != NULL happens inside the loop. This is 
6635      * done because objects that cannot be found are considered to be 
6636      * an empty hash. The reply should then be a series of NULLs. */ 
6637     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2)); 
6638     for (i 
= 2; i 
< c
->argc
; i
++) { 
6639         if (o 
!= NULL 
&& (value 
= hashGet(o
,c
->argv
[i
])) != NULL
) { 
6640             addReplyBulk(c
,value
); 
6641             decrRefCount(value
); 
6643             addReply(c
,shared
.nullbulk
); 
6648 static void hdelCommand(redisClient 
*c
) { 
6650     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6651         checkType(c
,o
,REDIS_HASH
)) return; 
6653     if (hashDelete(o
,c
->argv
[2])) { 
6654         if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]); 
6655         addReply(c
,shared
.cone
); 
6658         addReply(c
,shared
.czero
); 
6662 static void hlenCommand(redisClient 
*c
) { 
6664     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6665         checkType(c
,o
,REDIS_HASH
)) return; 
6667     addReplyUlong(c
,hashLength(o
)); 
6670 static void genericHgetallCommand(redisClient 
*c
, int flags
) { 
6671     robj 
*o
, *lenobj
, *obj
; 
6672     unsigned long count 
= 0; 
6675     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
6676         || checkType(c
,o
,REDIS_HASH
)) return; 
6678     lenobj 
= createObject(REDIS_STRING
,NULL
); 
6680     decrRefCount(lenobj
); 
6682     hi 
= hashInitIterator(o
); 
6683     while (hashNext(hi
) != REDIS_ERR
) { 
6684         if (flags 
& REDIS_HASH_KEY
) { 
6685             obj 
= hashCurrent(hi
,REDIS_HASH_KEY
); 
6686             addReplyBulk(c
,obj
); 
6690         if (flags 
& REDIS_HASH_VALUE
) { 
6691             obj 
= hashCurrent(hi
,REDIS_HASH_VALUE
); 
6692             addReplyBulk(c
,obj
); 
6697     hashReleaseIterator(hi
); 
6699     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",count
); 
6702 static void hkeysCommand(redisClient 
*c
) { 
6703     genericHgetallCommand(c
,REDIS_HASH_KEY
); 
6706 static void hvalsCommand(redisClient 
*c
) { 
6707     genericHgetallCommand(c
,REDIS_HASH_VALUE
); 
6710 static void hgetallCommand(redisClient 
*c
) { 
6711     genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
); 
6714 static void hexistsCommand(redisClient 
*c
) { 
6716     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6717         checkType(c
,o
,REDIS_HASH
)) return; 
6719     addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone 
: shared
.czero
); 
6722 static void convertToRealHash(robj 
*o
) { 
6723     unsigned char *key
, *val
, *p
, *zm 
= o
->ptr
; 
6724     unsigned int klen
, vlen
; 
6725     dict 
*dict 
= dictCreate(&hashDictType
,NULL
); 
6727     assert(o
->type 
== REDIS_HASH 
&& o
->encoding 
!= REDIS_ENCODING_HT
); 
6728     p 
= zipmapRewind(zm
); 
6729     while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
6730         robj 
*keyobj
, *valobj
; 
6732         keyobj 
= createStringObject((char*)key
,klen
); 
6733         valobj 
= createStringObject((char*)val
,vlen
); 
6734         keyobj 
= tryObjectEncoding(keyobj
); 
6735         valobj 
= tryObjectEncoding(valobj
); 
6736         dictAdd(dict
,keyobj
,valobj
); 
6738     o
->encoding 
= REDIS_ENCODING_HT
; 
6743 /* ========================= Non type-specific commands  ==================== */ 
6745 static void flushdbCommand(redisClient 
*c
) { 
6746     server
.dirty 
+= dictSize(c
->db
->dict
); 
6747     dictEmpty(c
->db
->dict
); 
6748     dictEmpty(c
->db
->expires
); 
6749     addReply(c
,shared
.ok
); 
6752 static void flushallCommand(redisClient 
*c
) { 
6753     server
.dirty 
+= emptyDb(); 
6754     addReply(c
,shared
.ok
); 
6755     if (server
.bgsavechildpid 
!= -1) { 
6756         kill(server
.bgsavechildpid
,SIGKILL
); 
6757         rdbRemoveTempFile(server
.bgsavechildpid
); 
6759     rdbSave(server
.dbfilename
); 
6763 static redisSortOperation 
*createSortOperation(int type
, robj 
*pattern
) { 
6764     redisSortOperation 
*so 
= zmalloc(sizeof(*so
)); 
6766     so
->pattern 
= pattern
; 
6770 /* Return the value associated to the key with a name obtained 
6771  * substituting the first occurence of '*' in 'pattern' with 'subst'. 
6772  * The returned object will always have its refcount increased by 1 
6773  * when it is non-NULL. */ 
6774 static robj 
*lookupKeyByPattern(redisDb 
*db
, robj 
*pattern
, robj 
*subst
) { 
6777     robj keyobj
, fieldobj
, *o
; 
6778     int prefixlen
, sublen
, postfixlen
, fieldlen
; 
6779     /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */ 
6783         char buf
[REDIS_SORTKEY_MAX
+1]; 
6784     } keyname
, fieldname
; 
6786     /* If the pattern is "#" return the substitution object itself in order 
6787      * to implement the "SORT ... GET #" feature. */ 
6788     spat 
= pattern
->ptr
; 
6789     if (spat
[0] == '#' && spat
[1] == '\0') { 
6790         incrRefCount(subst
); 
6794     /* The substitution object may be specially encoded. If so we create 
6795      * a decoded object on the fly. Otherwise getDecodedObject will just 
6796      * increment the ref count, that we'll decrement later. */ 
6797     subst 
= getDecodedObject(subst
); 
6800     if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
; 
6801     p 
= strchr(spat
,'*'); 
6803         decrRefCount(subst
); 
6807     /* Find out if we're dealing with a hash dereference. */ 
6808     if ((f 
= strstr(p
+1, "->")) != NULL
) { 
6809         fieldlen 
= sdslen(spat
)-(f
-spat
); 
6810         /* this also copies \0 character */ 
6811         memcpy(fieldname
.buf
,f
+2,fieldlen
-1); 
6812         fieldname
.len 
= fieldlen
-2; 
6818     sublen 
= sdslen(ssub
); 
6819     postfixlen 
= sdslen(spat
)-(prefixlen
+1)-fieldlen
; 
6820     memcpy(keyname
.buf
,spat
,prefixlen
); 
6821     memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
); 
6822     memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
); 
6823     keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0'; 
6824     keyname
.len 
= prefixlen
+sublen
+postfixlen
; 
6825     decrRefCount(subst
); 
6827     /* Lookup substituted key */ 
6828     initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2)); 
6829     o 
= lookupKeyRead(db
,&keyobj
); 
6830     if (o 
== NULL
) return NULL
; 
6833         if (o
->type 
!= REDIS_HASH 
|| fieldname
.len 
< 1) return NULL
; 
6835         /* Retrieve value from hash by the field name. This operation 
6836          * already increases the refcount of the returned object. */ 
6837         initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2)); 
6838         o 
= hashGet(o
, &fieldobj
); 
6840         if (o
->type 
!= REDIS_STRING
) return NULL
; 
6842         /* Every object that this function returns needs to have its refcount 
6843          * increased. sortCommand decreases it again. */ 
6850 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with 
6851  * the additional parameter is not standard but a BSD-specific we have to 
6852  * pass sorting parameters via the global 'server' structure */ 
6853 static int sortCompare(const void *s1
, const void *s2
) { 
6854     const redisSortObject 
*so1 
= s1
, *so2 
= s2
; 
6857     if (!server
.sort_alpha
) { 
6858         /* Numeric sorting. Here it's trivial as we precomputed scores */ 
6859         if (so1
->u
.score 
> so2
->u
.score
) { 
6861         } else if (so1
->u
.score 
< so2
->u
.score
) { 
6867         /* Alphanumeric sorting */ 
6868         if (server
.sort_bypattern
) { 
6869             if (!so1
->u
.cmpobj 
|| !so2
->u
.cmpobj
) { 
6870                 /* At least one compare object is NULL */ 
6871                 if (so1
->u
.cmpobj 
== so2
->u
.cmpobj
) 
6873                 else if (so1
->u
.cmpobj 
== NULL
) 
6878                 /* We have both the objects, use strcoll */ 
6879                 cmp 
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
); 
6882             /* Compare elements directly. */ 
6883             cmp 
= compareStringObjects(so1
->obj
,so2
->obj
); 
6886     return server
.sort_desc 
? -cmp 
: cmp
; 
6889 /* The SORT command is the most complex command in Redis. Warning: this code 
6890  * is optimized for speed and a bit less for readability */ 
6891 static void sortCommand(redisClient 
*c
) { 
6894     int desc 
= 0, alpha 
= 0; 
6895     int limit_start 
= 0, limit_count 
= -1, start
, end
; 
6896     int j
, dontsort 
= 0, vectorlen
; 
6897     int getop 
= 0; /* GET operation counter */ 
6898     robj 
*sortval
, *sortby 
= NULL
, *storekey 
= NULL
; 
6899     redisSortObject 
*vector
; /* Resulting vector to sort */ 
6901     /* Lookup the key to sort. It must be of the right types */ 
6902     sortval 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6903     if (sortval 
== NULL
) { 
6904         addReply(c
,shared
.emptymultibulk
); 
6907     if (sortval
->type 
!= REDIS_SET 
&& sortval
->type 
!= REDIS_LIST 
&& 
6908         sortval
->type 
!= REDIS_ZSET
) 
6910         addReply(c
,shared
.wrongtypeerr
); 
6914     /* Create a list of operations to perform for every sorted element. 
6915      * Operations can be GET/DEL/INCR/DECR */ 
6916     operations 
= listCreate(); 
6917     listSetFreeMethod(operations
,zfree
); 
6920     /* Now we need to protect sortval incrementing its count, in the future 
6921      * SORT may have options able to overwrite/delete keys during the sorting 
6922      * and the sorted key itself may get destroied */ 
6923     incrRefCount(sortval
); 
6925     /* The SORT command has an SQL-alike syntax, parse it */ 
6926     while(j 
< c
->argc
) { 
6927         int leftargs 
= c
->argc
-j
-1; 
6928         if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) { 
6930         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) { 
6932         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) { 
6934         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs 
>= 2) { 
6935             limit_start 
= atoi(c
->argv
[j
+1]->ptr
); 
6936             limit_count 
= atoi(c
->argv
[j
+2]->ptr
); 
6938         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs 
>= 1) { 
6939             storekey 
= c
->argv
[j
+1]; 
6941         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs 
>= 1) { 
6942             sortby 
= c
->argv
[j
+1]; 
6943             /* If the BY pattern does not contain '*', i.e. it is constant, 
6944              * we don't need to sort nor to lookup the weight keys. */ 
6945             if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort 
= 1; 
6947         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs 
>= 1) { 
6948             listAddNodeTail(operations
,createSortOperation( 
6949                 REDIS_SORT_GET
,c
->argv
[j
+1])); 
6953             decrRefCount(sortval
); 
6954             listRelease(operations
); 
6955             addReply(c
,shared
.syntaxerr
); 
6961     /* Load the sorting vector with all the objects to sort */ 
6962     switch(sortval
->type
) { 
6963     case REDIS_LIST
: vectorlen 
= listLength((list
*)sortval
->ptr
); break; 
6964     case REDIS_SET
: vectorlen 
=  dictSize((dict
*)sortval
->ptr
); break; 
6965     case REDIS_ZSET
: vectorlen 
= dictSize(((zset
*)sortval
->ptr
)->dict
); break; 
6966     default: vectorlen 
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */ 
6968     vector 
= zmalloc(sizeof(redisSortObject
)*vectorlen
); 
6971     if (sortval
->type 
== REDIS_LIST
) { 
6972         list 
*list 
= sortval
->ptr
; 
6976         listRewind(list
,&li
); 
6977         while((ln 
= listNext(&li
))) { 
6978             robj 
*ele 
= ln
->value
; 
6979             vector
[j
].obj 
= ele
; 
6980             vector
[j
].u
.score 
= 0; 
6981             vector
[j
].u
.cmpobj 
= NULL
; 
6989         if (sortval
->type 
== REDIS_SET
) { 
6992             zset 
*zs 
= sortval
->ptr
; 
6996         di 
= dictGetIterator(set
); 
6997         while((setele 
= dictNext(di
)) != NULL
) { 
6998             vector
[j
].obj 
= dictGetEntryKey(setele
); 
6999             vector
[j
].u
.score 
= 0; 
7000             vector
[j
].u
.cmpobj 
= NULL
; 
7003         dictReleaseIterator(di
); 
7005     redisAssert(j 
== vectorlen
); 
7007     /* Now it's time to load the right scores in the sorting vector */ 
7008     if (dontsort 
== 0) { 
7009         for (j 
= 0; j 
< vectorlen
; j
++) { 
7012                 /* lookup value to sort by */ 
7013                 byval 
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
); 
7014                 if (!byval
) continue; 
7016                 /* use object itself to sort by */ 
7017                 byval 
= vector
[j
].obj
; 
7021                 if (sortby
) vector
[j
].u
.cmpobj 
= getDecodedObject(byval
); 
7023                 if (byval
->encoding 
== REDIS_ENCODING_RAW
) { 
7024                     vector
[j
].u
.score 
= strtod(byval
->ptr
,NULL
); 
7025                 } else if (byval
->encoding 
== REDIS_ENCODING_INT
) { 
7026                     /* Don't need to decode the object if it's 
7027                      * integer-encoded (the only encoding supported) so 
7028                      * far. We can just cast it */ 
7029                     vector
[j
].u
.score 
= (long)byval
->ptr
; 
7031                     redisAssert(1 != 1); 
7035             /* when the object was retrieved using lookupKeyByPattern, 
7036              * its refcount needs to be decreased. */ 
7038                 decrRefCount(byval
); 
7043     /* We are ready to sort the vector... perform a bit of sanity check 
7044      * on the LIMIT option too. We'll use a partial version of quicksort. */ 
7045     start 
= (limit_start 
< 0) ? 0 : limit_start
; 
7046     end 
= (limit_count 
< 0) ? vectorlen
-1 : start
+limit_count
-1; 
7047     if (start 
>= vectorlen
) { 
7048         start 
= vectorlen
-1; 
7051     if (end 
>= vectorlen
) end 
= vectorlen
-1; 
7053     if (dontsort 
== 0) { 
7054         server
.sort_desc 
= desc
; 
7055         server
.sort_alpha 
= alpha
; 
7056         server
.sort_bypattern 
= sortby 
? 1 : 0; 
7057         if (sortby 
&& (start 
!= 0 || end 
!= vectorlen
-1)) 
7058             pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
); 
7060             qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
); 
7063     /* Send command output to the output buffer, performing the specified 
7064      * GET/DEL/INCR/DECR operations if any. */ 
7065     outputlen 
= getop 
? getop
*(end
-start
+1) : end
-start
+1; 
7066     if (storekey 
== NULL
) { 
7067         /* STORE option not specified, sent the sorting result to client */ 
7068         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
)); 
7069         for (j 
= start
; j 
<= end
; j
++) { 
7073             if (!getop
) addReplyBulk(c
,vector
[j
].obj
); 
7074             listRewind(operations
,&li
); 
7075             while((ln 
= listNext(&li
))) { 
7076                 redisSortOperation 
*sop 
= ln
->value
; 
7077                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
7080                 if (sop
->type 
== REDIS_SORT_GET
) { 
7082                         addReply(c
,shared
.nullbulk
); 
7084                         addReplyBulk(c
,val
); 
7088                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
7093         robj 
*listObject 
= createListObject(); 
7094         list 
*listPtr 
= (list
*) listObject
->ptr
; 
7096         /* STORE option specified, set the sorting result as a List object */ 
7097         for (j 
= start
; j 
<= end
; j
++) { 
7102                 listAddNodeTail(listPtr
,vector
[j
].obj
); 
7103                 incrRefCount(vector
[j
].obj
); 
7105             listRewind(operations
,&li
); 
7106             while((ln 
= listNext(&li
))) { 
7107                 redisSortOperation 
*sop 
= ln
->value
; 
7108                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
7111                 if (sop
->type 
== REDIS_SORT_GET
) { 
7113                         listAddNodeTail(listPtr
,createStringObject("",0)); 
7115                         /* We should do a incrRefCount on val because it is 
7116                          * added to the list, but also a decrRefCount because 
7117                          * it is returned by lookupKeyByPattern. This results 
7118                          * in doing nothing at all. */ 
7119                         listAddNodeTail(listPtr
,val
); 
7122                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
7126         if (dictReplace(c
->db
->dict
,storekey
,listObject
)) { 
7127             incrRefCount(storekey
); 
7129         /* Note: we add 1 because the DB is dirty anyway since even if the 
7130          * SORT result is empty a new key is set and maybe the old content 
7132         server
.dirty 
+= 1+outputlen
; 
7133         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
)); 
7137     decrRefCount(sortval
); 
7138     listRelease(operations
); 
7139     for (j 
= 0; j 
< vectorlen
; j
++) { 
7140         if (alpha 
&& vector
[j
].u
.cmpobj
) 
7141             decrRefCount(vector
[j
].u
.cmpobj
); 
7146 /* Convert an amount of bytes into a human readable string in the form 
7147  * of 100B, 2G, 100M, 4K, and so forth. */ 
7148 static void bytesToHuman(char *s
, unsigned long long n
) { 
7153         sprintf(s
,"%lluB",n
); 
7155     } else if (n 
< (1024*1024)) { 
7156         d 
= (double)n
/(1024); 
7157         sprintf(s
,"%.2fK",d
); 
7158     } else if (n 
< (1024LL*1024*1024)) { 
7159         d 
= (double)n
/(1024*1024); 
7160         sprintf(s
,"%.2fM",d
); 
7161     } else if (n 
< (1024LL*1024*1024*1024)) { 
7162         d 
= (double)n
/(1024LL*1024*1024); 
7163         sprintf(s
,"%.2fG",d
); 
7167 /* Create the string returned by the INFO command. This is decoupled 
7168  * by the INFO command itself as we need to report the same information 
7169  * on memory corruption problems. */ 
7170 static sds 
genRedisInfoString(void) { 
7172     time_t uptime 
= time(NULL
)-server
.stat_starttime
; 
7176     bytesToHuman(hmem
,zmalloc_used_memory()); 
7177     info 
= sdscatprintf(sdsempty(), 
7178         "redis_version:%s\r\n" 
7179         "redis_git_sha1:%s\r\n" 
7180         "redis_git_dirty:%d\r\n" 
7182         "multiplexing_api:%s\r\n" 
7183         "process_id:%ld\r\n" 
7184         "uptime_in_seconds:%ld\r\n" 
7185         "uptime_in_days:%ld\r\n" 
7186         "connected_clients:%d\r\n" 
7187         "connected_slaves:%d\r\n" 
7188         "blocked_clients:%d\r\n" 
7189         "used_memory:%zu\r\n" 
7190         "used_memory_human:%s\r\n" 
7191         "changes_since_last_save:%lld\r\n" 
7192         "bgsave_in_progress:%d\r\n" 
7193         "last_save_time:%ld\r\n" 
7194         "bgrewriteaof_in_progress:%d\r\n" 
7195         "total_connections_received:%lld\r\n" 
7196         "total_commands_processed:%lld\r\n" 
7197         "expired_keys:%lld\r\n" 
7198         "hash_max_zipmap_entries:%zu\r\n" 
7199         "hash_max_zipmap_value:%zu\r\n" 
7200         "pubsub_channels:%ld\r\n" 
7201         "pubsub_patterns:%u\r\n" 
7206         strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0, 
7207         (sizeof(long) == 8) ? "64" : "32", 
7212         listLength(server
.clients
)-listLength(server
.slaves
), 
7213         listLength(server
.slaves
), 
7214         server
.blpop_blocked_clients
, 
7215         zmalloc_used_memory(), 
7218         server
.bgsavechildpid 
!= -1, 
7220         server
.bgrewritechildpid 
!= -1, 
7221         server
.stat_numconnections
, 
7222         server
.stat_numcommands
, 
7223         server
.stat_expiredkeys
, 
7224         server
.hash_max_zipmap_entries
, 
7225         server
.hash_max_zipmap_value
, 
7226         dictSize(server
.pubsub_channels
), 
7227         listLength(server
.pubsub_patterns
), 
7228         server
.vm_enabled 
!= 0, 
7229         server
.masterhost 
== NULL 
? "master" : "slave" 
7231     if (server
.masterhost
) { 
7232         info 
= sdscatprintf(info
, 
7233             "master_host:%s\r\n" 
7234             "master_port:%d\r\n" 
7235             "master_link_status:%s\r\n" 
7236             "master_last_io_seconds_ago:%d\r\n" 
7239             (server
.replstate 
== REDIS_REPL_CONNECTED
) ? 
7241             server
.master 
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1 
7244     if (server
.vm_enabled
) { 
7246         info 
= sdscatprintf(info
, 
7247             "vm_conf_max_memory:%llu\r\n" 
7248             "vm_conf_page_size:%llu\r\n" 
7249             "vm_conf_pages:%llu\r\n" 
7250             "vm_stats_used_pages:%llu\r\n" 
7251             "vm_stats_swapped_objects:%llu\r\n" 
7252             "vm_stats_swappin_count:%llu\r\n" 
7253             "vm_stats_swappout_count:%llu\r\n" 
7254             "vm_stats_io_newjobs_len:%lu\r\n" 
7255             "vm_stats_io_processing_len:%lu\r\n" 
7256             "vm_stats_io_processed_len:%lu\r\n" 
7257             "vm_stats_io_active_threads:%lu\r\n" 
7258             "vm_stats_blocked_clients:%lu\r\n" 
7259             ,(unsigned long long) server
.vm_max_memory
, 
7260             (unsigned long long) server
.vm_page_size
, 
7261             (unsigned long long) server
.vm_pages
, 
7262             (unsigned long long) server
.vm_stats_used_pages
, 
7263             (unsigned long long) server
.vm_stats_swapped_objects
, 
7264             (unsigned long long) server
.vm_stats_swapins
, 
7265             (unsigned long long) server
.vm_stats_swapouts
, 
7266             (unsigned long) listLength(server
.io_newjobs
), 
7267             (unsigned long) listLength(server
.io_processing
), 
7268             (unsigned long) listLength(server
.io_processed
), 
7269             (unsigned long) server
.io_active_threads
, 
7270             (unsigned long) server
.vm_blocked_clients
 
7274     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7275         long long keys
, vkeys
; 
7277         keys 
= dictSize(server
.db
[j
].dict
); 
7278         vkeys 
= dictSize(server
.db
[j
].expires
); 
7279         if (keys 
|| vkeys
) { 
7280             info 
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n", 
7287 static void infoCommand(redisClient 
*c
) { 
7288     sds info 
= genRedisInfoString(); 
7289     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n", 
7290         (unsigned long)sdslen(info
))); 
7291     addReplySds(c
,info
); 
7292     addReply(c
,shared
.crlf
); 
7295 static void monitorCommand(redisClient 
*c
) { 
7296     /* ignore MONITOR if aleady slave or in monitor mode */ 
7297     if (c
->flags 
& REDIS_SLAVE
) return; 
7299     c
->flags 
|= (REDIS_SLAVE
|REDIS_MONITOR
); 
7301     listAddNodeTail(server
.monitors
,c
); 
7302     addReply(c
,shared
.ok
); 
7305 /* ================================= Expire ================================= */ 
7306 static int removeExpire(redisDb 
*db
, robj 
*key
) { 
7307     if (dictDelete(db
->expires
,key
) == DICT_OK
) { 
7314 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
7315     if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) { 
7323 /* Return the expire time of the specified key, or -1 if no expire 
7324  * is associated with this key (i.e. the key is non volatile) */ 
7325 static time_t getExpire(redisDb 
*db
, robj 
*key
) { 
7328     /* No expire? return ASAP */ 
7329     if (dictSize(db
->expires
) == 0 || 
7330        (de 
= dictFind(db
->expires
,key
)) == NULL
) return -1; 
7332     return (time_t) dictGetEntryVal(de
); 
7335 static int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
7339     /* No expire? return ASAP */ 
7340     if (dictSize(db
->expires
) == 0 || 
7341        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
7343     /* Lookup the expire */ 
7344     when 
= (time_t) dictGetEntryVal(de
); 
7345     if (time(NULL
) <= when
) return 0; 
7347     /* Delete the key */ 
7348     dictDelete(db
->expires
,key
); 
7349     server
.stat_expiredkeys
++; 
7350     return dictDelete(db
->dict
,key
) == DICT_OK
; 
7353 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
) { 
7356     /* No expire? return ASAP */ 
7357     if (dictSize(db
->expires
) == 0 || 
7358        (de 
= dictFind(db
->expires
,key
)) == NULL
) return 0; 
7360     /* Delete the key */ 
7362     server
.stat_expiredkeys
++; 
7363     dictDelete(db
->expires
,key
); 
7364     return dictDelete(db
->dict
,key
) == DICT_OK
; 
7367 static void expireGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*param
, long offset
) { 
7371     if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return; 
7375     de 
= dictFind(c
->db
->dict
,key
); 
7377         addReply(c
,shared
.czero
); 
7381         if (deleteKey(c
->db
,key
)) server
.dirty
++; 
7382         addReply(c
, shared
.cone
); 
7385         time_t when 
= time(NULL
)+seconds
; 
7386         if (setExpire(c
->db
,key
,when
)) { 
7387             addReply(c
,shared
.cone
); 
7390             addReply(c
,shared
.czero
); 
7396 static void expireCommand(redisClient 
*c
) { 
7397     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0); 
7400 static void expireatCommand(redisClient 
*c
) { 
7401     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
)); 
7404 static void ttlCommand(redisClient 
*c
) { 
7408     expire 
= getExpire(c
->db
,c
->argv
[1]); 
7410         ttl 
= (int) (expire
-time(NULL
)); 
7411         if (ttl 
< 0) ttl 
= -1; 
7413     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
)); 
7416 /* ================================ MULTI/EXEC ============================== */ 
7418 /* Client state initialization for MULTI/EXEC */ 
7419 static void initClientMultiState(redisClient 
*c
) { 
7420     c
->mstate
.commands 
= NULL
; 
7421     c
->mstate
.count 
= 0; 
7424 /* Release all the resources associated with MULTI/EXEC state */ 
7425 static void freeClientMultiState(redisClient 
*c
) { 
7428     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
7430         multiCmd 
*mc 
= c
->mstate
.commands
+j
; 
7432         for (i 
= 0; i 
< mc
->argc
; i
++) 
7433             decrRefCount(mc
->argv
[i
]); 
7436     zfree(c
->mstate
.commands
); 
7439 /* Add a new command into the MULTI commands queue */ 
7440 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
) { 
7444     c
->mstate
.commands 
= zrealloc(c
->mstate
.commands
, 
7445             sizeof(multiCmd
)*(c
->mstate
.count
+1)); 
7446     mc 
= c
->mstate
.commands
+c
->mstate
.count
; 
7449     mc
->argv 
= zmalloc(sizeof(robj
*)*c
->argc
); 
7450     memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
); 
7451     for (j 
= 0; j 
< c
->argc
; j
++) 
7452         incrRefCount(mc
->argv
[j
]); 
7456 static void multiCommand(redisClient 
*c
) { 
7457     c
->flags 
|= REDIS_MULTI
; 
7458     addReply(c
,shared
.ok
); 
7461 static void discardCommand(redisClient 
*c
) { 
7462     if (!(c
->flags 
& REDIS_MULTI
)) { 
7463         addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n")); 
7467     freeClientMultiState(c
); 
7468     initClientMultiState(c
); 
7469     c
->flags 
&= (~REDIS_MULTI
); 
7470     addReply(c
,shared
.ok
); 
7473 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand 
7474  * implememntation for more information. */ 
7475 static void execCommandReplicateMulti(redisClient 
*c
) { 
7476     struct redisCommand 
*cmd
; 
7477     robj 
*multistring 
= createStringObject("MULTI",5); 
7479     cmd 
= lookupCommand("multi"); 
7480     if (server
.appendonly
) 
7481         feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1); 
7482     if (listLength(server
.slaves
)) 
7483         replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1); 
7484     decrRefCount(multistring
); 
7487 static void execCommand(redisClient 
*c
) { 
7492     if (!(c
->flags 
& REDIS_MULTI
)) { 
7493         addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n")); 
7497     /* Replicate a MULTI request now that we are sure the block is executed. 
7498      * This way we'll deliver the MULTI/..../EXEC block as a whole and 
7499      * both the AOF and the replication link will have the same consistency 
7500      * and atomicity guarantees. */ 
7501     execCommandReplicateMulti(c
); 
7503     /* Exec all the queued commands */ 
7504     orig_argv 
= c
->argv
; 
7505     orig_argc 
= c
->argc
; 
7506     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
)); 
7507     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
7508         c
->argc 
= c
->mstate
.commands
[j
].argc
; 
7509         c
->argv 
= c
->mstate
.commands
[j
].argv
; 
7510         call(c
,c
->mstate
.commands
[j
].cmd
); 
7512     c
->argv 
= orig_argv
; 
7513     c
->argc 
= orig_argc
; 
7514     freeClientMultiState(c
); 
7515     initClientMultiState(c
); 
7516     c
->flags 
&= (~REDIS_MULTI
); 
7517     /* Make sure the EXEC command is always replicated / AOF, since we 
7518      * always send the MULTI command (we can't know beforehand if the 
7519      * next operations will contain at least a modification to the DB). */ 
7523 /* =========================== Blocking Operations  ========================= */ 
7525 /* Currently Redis blocking operations support is limited to list POP ops, 
7526  * so the current implementation is not fully generic, but it is also not 
7527  * completely specific so it will not require a rewrite to support new 
7528  * kind of blocking operations in the future. 
7530  * Still it's important to note that list blocking operations can be already 
7531  * used as a notification mechanism in order to implement other blocking 
7532  * operations at application level, so there must be a very strong evidence 
7533  * of usefulness and generality before new blocking operations are implemented. 
7535  * This is how the current blocking POP works, we use BLPOP as example: 
7536  * - If the user calls BLPOP and the key exists and contains a non empty list 
7537  *   then LPOP is called instead. So BLPOP is semantically the same as LPOP 
7538  *   if there is not to block. 
7539  * - If instead BLPOP is called and the key does not exists or the list is 
7540  *   empty we need to block. In order to do so we remove the notification for 
7541  *   new data to read in the client socket (so that we'll not serve new 
7542  *   requests if the blocking request is not served). Also we put the client 
7543  *   in a dictionary (db->blockingkeys) mapping keys to a list of clients 
7544  *   blocking for this keys. 
7545  * - If a PUSH operation against a key with blocked clients waiting is 
7546  *   performed, we serve the first in the list: basically instead to push 
7547  *   the new element inside the list we return it to the (first / oldest) 
7548  *   blocking client, unblock the client, and remove it form the list. 
7550  * The above comment and the source code should be enough in order to understand 
7551  * the implementation and modify / fix it later. 
7554 /* Set a client in blocking mode for the specified key, with the specified 
7556 static void blockForKeys(redisClient 
*c
, robj 
**keys
, int numkeys
, time_t timeout
) { 
7561     c
->blockingkeys 
= zmalloc(sizeof(robj
*)*numkeys
); 
7562     c
->blockingkeysnum 
= numkeys
; 
7563     c
->blockingto 
= timeout
; 
7564     for (j 
= 0; j 
< numkeys
; j
++) { 
7565         /* Add the key in the client structure, to map clients -> keys */ 
7566         c
->blockingkeys
[j
] = keys
[j
]; 
7567         incrRefCount(keys
[j
]); 
7569         /* And in the other "side", to map keys -> clients */ 
7570         de 
= dictFind(c
->db
->blockingkeys
,keys
[j
]); 
7574             /* For every key we take a list of clients blocked for it */ 
7576             retval 
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
); 
7577             incrRefCount(keys
[j
]); 
7578             assert(retval 
== DICT_OK
); 
7580             l 
= dictGetEntryVal(de
); 
7582         listAddNodeTail(l
,c
); 
7584     /* Mark the client as a blocked client */ 
7585     c
->flags 
|= REDIS_BLOCKED
; 
7586     server
.blpop_blocked_clients
++; 
7589 /* Unblock a client that's waiting in a blocking operation such as BLPOP */ 
7590 static void unblockClientWaitingData(redisClient 
*c
) { 
7595     assert(c
->blockingkeys 
!= NULL
); 
7596     /* The client may wait for multiple keys, so unblock it for every key. */ 
7597     for (j 
= 0; j 
< c
->blockingkeysnum
; j
++) { 
7598         /* Remove this client from the list of clients waiting for this key. */ 
7599         de 
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
7601         l 
= dictGetEntryVal(de
); 
7602         listDelNode(l
,listSearchKey(l
,c
)); 
7603         /* If the list is empty we need to remove it to avoid wasting memory */ 
7604         if (listLength(l
) == 0) 
7605             dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]); 
7606         decrRefCount(c
->blockingkeys
[j
]); 
7608     /* Cleanup the client structure */ 
7609     zfree(c
->blockingkeys
); 
7610     c
->blockingkeys 
= NULL
; 
7611     c
->flags 
&= (~REDIS_BLOCKED
); 
7612     server
.blpop_blocked_clients
--; 
7613     /* We want to process data if there is some command waiting 
7614      * in the input buffer. Note that this is safe even if 
7615      * unblockClientWaitingData() gets called from freeClient() because 
7616      * freeClient() will be smart enough to call this function 
7617      * *after* c->querybuf was set to NULL. */ 
7618     if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
); 
7621 /* This should be called from any function PUSHing into lists. 
7622  * 'c' is the "pushing client", 'key' is the key it is pushing data against, 
7623  * 'ele' is the element pushed. 
7625  * If the function returns 0 there was no client waiting for a list push 
7628  * If the function returns 1 there was a client waiting for a list push 
7629  * against this key, the element was passed to this client thus it's not 
7630  * needed to actually add it to the list and the caller should return asap. */ 
7631 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
) { 
7632     struct dictEntry 
*de
; 
7633     redisClient 
*receiver
; 
7637     de 
= dictFind(c
->db
->blockingkeys
,key
); 
7638     if (de 
== NULL
) return 0; 
7639     l 
= dictGetEntryVal(de
); 
7642     receiver 
= ln
->value
; 
7644     addReplySds(receiver
,sdsnew("*2\r\n")); 
7645     addReplyBulk(receiver
,key
); 
7646     addReplyBulk(receiver
,ele
); 
7647     unblockClientWaitingData(receiver
); 
7651 /* Blocking RPOP/LPOP */ 
7652 static void blockingPopGenericCommand(redisClient 
*c
, int where
) { 
7657     for (j 
= 1; j 
< c
->argc
-1; j
++) { 
7658         o 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
7660             if (o
->type 
!= REDIS_LIST
) { 
7661                 addReply(c
,shared
.wrongtypeerr
); 
7664                 list 
*list 
= o
->ptr
; 
7665                 if (listLength(list
) != 0) { 
7666                     /* If the list contains elements fall back to the usual 
7667                      * non-blocking POP operation */ 
7668                     robj 
*argv
[2], **orig_argv
; 
7671                     /* We need to alter the command arguments before to call 
7672                      * popGenericCommand() as the command takes a single key. */ 
7673                     orig_argv 
= c
->argv
; 
7674                     orig_argc 
= c
->argc
; 
7675                     argv
[1] = c
->argv
[j
]; 
7679                     /* Also the return value is different, we need to output 
7680                      * the multi bulk reply header and the key name. The 
7681                      * "real" command will add the last element (the value) 
7682                      * for us. If this souds like an hack to you it's just 
7683                      * because it is... */ 
7684                     addReplySds(c
,sdsnew("*2\r\n")); 
7685                     addReplyBulk(c
,argv
[1]); 
7686                     popGenericCommand(c
,where
); 
7688                     /* Fix the client structure with the original stuff */ 
7689                     c
->argv 
= orig_argv
; 
7690                     c
->argc 
= orig_argc
; 
7696     /* If the list is empty or the key does not exists we must block */ 
7697     timeout 
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10); 
7698     if (timeout 
> 0) timeout 
+= time(NULL
); 
7699     blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
); 
7702 static void blpopCommand(redisClient 
*c
) { 
7703     blockingPopGenericCommand(c
,REDIS_HEAD
); 
7706 static void brpopCommand(redisClient 
*c
) { 
7707     blockingPopGenericCommand(c
,REDIS_TAIL
); 
7710 /* =============================== Replication  ============================= */ 
7712 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7713     ssize_t nwritten
, ret 
= size
; 
7714     time_t start 
= time(NULL
); 
7718         if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) { 
7719             nwritten 
= write(fd
,ptr
,size
); 
7720             if (nwritten 
== -1) return -1; 
7724         if ((time(NULL
)-start
) > timeout
) { 
7732 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7733     ssize_t nread
, totread 
= 0; 
7734     time_t start 
= time(NULL
); 
7738         if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) { 
7739             nread 
= read(fd
,ptr
,size
); 
7740             if (nread 
== -1) return -1; 
7745         if ((time(NULL
)-start
) > timeout
) { 
7753 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
7760         if (syncRead(fd
,&c
,1,timeout
) == -1) return -1; 
7763             if (nread 
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0'; 
7774 static void syncCommand(redisClient 
*c
) { 
7775     /* ignore SYNC if aleady slave or in monitor mode */ 
7776     if (c
->flags 
& REDIS_SLAVE
) return; 
7778     /* SYNC can't be issued when the server has pending data to send to 
7779      * the client about already issued commands. We need a fresh reply 
7780      * buffer registering the differences between the BGSAVE and the current 
7781      * dataset, so that we can copy to other slaves if needed. */ 
7782     if (listLength(c
->reply
) != 0) { 
7783         addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n")); 
7787     redisLog(REDIS_NOTICE
,"Slave ask for synchronization"); 
7788     /* Here we need to check if there is a background saving operation 
7789      * in progress, or if it is required to start one */ 
7790     if (server
.bgsavechildpid 
!= -1) { 
7791         /* Ok a background save is in progress. Let's check if it is a good 
7792          * one for replication, i.e. if there is another slave that is 
7793          * registering differences since the server forked to save */ 
7798         listRewind(server
.slaves
,&li
); 
7799         while((ln 
= listNext(&li
))) { 
7801             if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) break; 
7804             /* Perfect, the server is already registering differences for 
7805              * another slave. Set the right state, and copy the buffer. */ 
7806             listRelease(c
->reply
); 
7807             c
->reply 
= listDup(slave
->reply
); 
7808             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7809             redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC"); 
7811             /* No way, we need to wait for the next BGSAVE in order to 
7812              * register differences */ 
7813             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
7814             redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC"); 
7817         /* Ok we don't have a BGSAVE in progress, let's start one */ 
7818         redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC"); 
7819         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
7820             redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE"); 
7821             addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n")); 
7824         c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7827     c
->flags 
|= REDIS_SLAVE
; 
7829     listAddNodeTail(server
.slaves
,c
); 
7833 static void sendBulkToSlave(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
7834     redisClient 
*slave 
= privdata
; 
7836     REDIS_NOTUSED(mask
); 
7837     char buf
[REDIS_IOBUF_LEN
]; 
7838     ssize_t nwritten
, buflen
; 
7840     if (slave
->repldboff 
== 0) { 
7841         /* Write the bulk write count before to transfer the DB. In theory here 
7842          * we don't know how much room there is in the output buffer of the 
7843          * socket, but in pratice SO_SNDLOWAT (the minimum count for output 
7844          * operations) will never be smaller than the few bytes we need. */ 
7847         bulkcount 
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long) 
7849         if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
)) 
7857     lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
); 
7858     buflen 
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
); 
7860         redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s", 
7861             (buflen 
== 0) ? "premature EOF" : strerror(errno
)); 
7865     if ((nwritten 
= write(fd
,buf
,buflen
)) == -1) { 
7866         redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s", 
7871     slave
->repldboff 
+= nwritten
; 
7872     if (slave
->repldboff 
== slave
->repldbsize
) { 
7873         close(slave
->repldbfd
); 
7874         slave
->repldbfd 
= -1; 
7875         aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
7876         slave
->replstate 
= REDIS_REPL_ONLINE
; 
7877         if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, 
7878             sendReplyToClient
, slave
) == AE_ERR
) { 
7882         addReplySds(slave
,sdsempty()); 
7883         redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded"); 
7887 /* This function is called at the end of every backgrond saving. 
7888  * The argument bgsaveerr is REDIS_OK if the background saving succeeded 
7889  * otherwise REDIS_ERR is passed to the function. 
7891  * The goal of this function is to handle slaves waiting for a successful 
7892  * background saving in order to perform non-blocking synchronization. */ 
7893 static void updateSlavesWaitingBgsave(int bgsaveerr
) { 
7895     int startbgsave 
= 0; 
7898     listRewind(server
.slaves
,&li
); 
7899     while((ln 
= listNext(&li
))) { 
7900         redisClient 
*slave 
= ln
->value
; 
7902         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) { 
7904             slave
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
7905         } else if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) { 
7906             struct redis_stat buf
; 
7908             if (bgsaveerr 
!= REDIS_OK
) { 
7910                 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error"); 
7913             if ((slave
->repldbfd 
= open(server
.dbfilename
,O_RDONLY
)) == -1 || 
7914                 redis_fstat(slave
->repldbfd
,&buf
) == -1) { 
7916                 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
)); 
7919             slave
->repldboff 
= 0; 
7920             slave
->repldbsize 
= buf
.st_size
; 
7921             slave
->replstate 
= REDIS_REPL_SEND_BULK
; 
7922             aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
7923             if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) { 
7930         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
7933             listRewind(server
.slaves
,&li
); 
7934             redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed"); 
7935             while((ln 
= listNext(&li
))) { 
7936                 redisClient 
*slave 
= ln
->value
; 
7938                 if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) 
7945 static int syncWithMaster(void) { 
7946     char buf
[1024], tmpfile
[256], authcmd
[1024]; 
7948     int fd 
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
); 
7949     int dfd
, maxtries 
= 5; 
7952         redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s", 
7957     /* AUTH with the master if required. */ 
7958     if(server
.masterauth
) { 
7959         snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
); 
7960         if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) { 
7962             redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s", 
7966         /* Read the AUTH result.  */ 
7967         if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
7969             redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s", 
7973         if (buf
[0] != '+') { 
7975             redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?"); 
7980     /* Issue the SYNC command */ 
7981     if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) { 
7983         redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s", 
7987     /* Read the bulk write count */ 
7988     if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
7990         redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s", 
7994     if (buf
[0] != '$') { 
7996         redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); 
7999     dumpsize 
= strtol(buf
+1,NULL
,10); 
8000     redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
); 
8001     /* Read the bulk write data on a temp file */ 
8003         snprintf(tmpfile
,256, 
8004             "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid()); 
8005         dfd 
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644); 
8006         if (dfd 
!= -1) break; 
8011         redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
)); 
8015         int nread
, nwritten
; 
8017         nread 
= read(fd
,buf
,(dumpsize 
< 1024)?dumpsize
:1024); 
8019             redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s", 
8025         nwritten 
= write(dfd
,buf
,nread
); 
8026         if (nwritten 
== -1) { 
8027             redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
)); 
8035     if (rename(tmpfile
,server
.dbfilename
) == -1) { 
8036         redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
)); 
8042     if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
8043         redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk"); 
8047     server
.master 
= createClient(fd
); 
8048     server
.master
->flags 
|= REDIS_MASTER
; 
8049     server
.master
->authenticated 
= 1; 
8050     server
.replstate 
= REDIS_REPL_CONNECTED
; 
8054 static void slaveofCommand(redisClient 
*c
) { 
8055     if (!strcasecmp(c
->argv
[1]->ptr
,"no") && 
8056         !strcasecmp(c
->argv
[2]->ptr
,"one")) { 
8057         if (server
.masterhost
) { 
8058             sdsfree(server
.masterhost
); 
8059             server
.masterhost 
= NULL
; 
8060             if (server
.master
) freeClient(server
.master
); 
8061             server
.replstate 
= REDIS_REPL_NONE
; 
8062             redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)"); 
8065         sdsfree(server
.masterhost
); 
8066         server
.masterhost 
= sdsdup(c
->argv
[1]->ptr
); 
8067         server
.masterport 
= atoi(c
->argv
[2]->ptr
); 
8068         if (server
.master
) freeClient(server
.master
); 
8069         server
.replstate 
= REDIS_REPL_CONNECT
; 
8070         redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)", 
8071             server
.masterhost
, server
.masterport
); 
8073     addReply(c
,shared
.ok
); 
8076 /* ============================ Maxmemory directive  ======================== */ 
8078 /* Try to free one object form the pre-allocated objects free list. 
8079  * This is useful under low mem conditions as by default we take 1 million 
8080  * free objects allocated. On success REDIS_OK is returned, otherwise 
8082 static int tryFreeOneObjectFromFreelist(void) { 
8085     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
8086     if (listLength(server
.objfreelist
)) { 
8087         listNode 
*head 
= listFirst(server
.objfreelist
); 
8088         o 
= listNodeValue(head
); 
8089         listDelNode(server
.objfreelist
,head
); 
8090         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
8094         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
8099 /* This function gets called when 'maxmemory' is set on the config file to limit 
8100  * the max memory used by the server, and we are out of memory. 
8101  * This function will try to, in order: 
8103  * - Free objects from the free list 
8104  * - Try to remove keys with an EXPIRE set 
8106  * It is not possible to free enough memory to reach used-memory < maxmemory 
8107  * the server will start refusing commands that will enlarge even more the 
8110 static void freeMemoryIfNeeded(void) { 
8111     while (server
.maxmemory 
&& zmalloc_used_memory() > server
.maxmemory
) { 
8112         int j
, k
, freed 
= 0; 
8114         if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
8115         for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8117             robj 
*minkey 
= NULL
; 
8118             struct dictEntry 
*de
; 
8120             if (dictSize(server
.db
[j
].expires
)) { 
8122                 /* From a sample of three keys drop the one nearest to 
8123                  * the natural expire */ 
8124                 for (k 
= 0; k 
< 3; k
++) { 
8127                     de 
= dictGetRandomKey(server
.db
[j
].expires
); 
8128                     t 
= (time_t) dictGetEntryVal(de
); 
8129                     if (minttl 
== -1 || t 
< minttl
) { 
8130                         minkey 
= dictGetEntryKey(de
); 
8134                 deleteKey(server
.db
+j
,minkey
); 
8137         if (!freed
) return; /* nothing to free... */ 
8141 /* ============================== Append Only file ========================== */ 
8143 /* Write the append only file buffer on disk. 
8145  * Since we are required to write the AOF before replying to the client, 
8146  * and the only way the client socket can get a write is entering when the 
8147  * the event loop, we accumulate all the AOF writes in a memory 
8148  * buffer and write it on disk using this function just before entering 
8149  * the event loop again. */ 
8150 static void flushAppendOnlyFile(void) { 
8154     if (sdslen(server
.aofbuf
) == 0) return; 
8156     /* We want to perform a single write. This should be guaranteed atomic 
8157      * at least if the filesystem we are writing is a real physical one. 
8158      * While this will save us against the server being killed I don't think 
8159      * there is much to do about the whole server stopping for power problems 
8161      nwritten 
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
)); 
8162      if (nwritten 
!= (signed)sdslen(server
.aofbuf
)) { 
8163         /* Ooops, we are in troubles. The best thing to do for now is 
8164          * aborting instead of giving the illusion that everything is 
8165          * working as expected. */ 
8166          if (nwritten 
== -1) { 
8167             redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
)); 
8169             redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
)); 
8173     sdsfree(server
.aofbuf
); 
8174     server
.aofbuf 
= sdsempty(); 
8176     /* Fsync if needed */ 
8178     if (server
.appendfsync 
== APPENDFSYNC_ALWAYS 
|| 
8179         (server
.appendfsync 
== APPENDFSYNC_EVERYSEC 
&& 
8180          now
-server
.lastfsync 
> 1)) 
8182         /* aof_fsync is defined as fdatasync() for Linux in order to avoid 
8183          * flushing metadata. */ 
8184         aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */ 
8185         server
.lastfsync 
= now
; 
8189 static sds 
catAppendOnlyGenericCommand(sds buf
, int argc
, robj 
**argv
) { 
8191     buf 
= sdscatprintf(buf
,"*%d\r\n",argc
); 
8192     for (j 
= 0; j 
< argc
; j
++) { 
8193         robj 
*o 
= getDecodedObject(argv
[j
]); 
8194         buf 
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
)); 
8195         buf 
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
)); 
8196         buf 
= sdscatlen(buf
,"\r\n",2); 
8202 static sds 
catAppendOnlyExpireAtCommand(sds buf
, robj 
*key
, robj 
*seconds
) { 
8207     /* Make sure we can use strtol */ 
8208     seconds 
= getDecodedObject(seconds
); 
8209     when 
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10); 
8210     decrRefCount(seconds
); 
8212     argv
[0] = createStringObject("EXPIREAT",8); 
8214     argv
[2] = createObject(REDIS_STRING
, 
8215         sdscatprintf(sdsempty(),"%ld",when
)); 
8216     buf 
= catAppendOnlyGenericCommand(buf
, argc
, argv
); 
8217     decrRefCount(argv
[0]); 
8218     decrRefCount(argv
[2]); 
8222 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
8223     sds buf 
= sdsempty(); 
8226     /* The DB this command was targetting is not the same as the last command 
8227      * we appendend. To issue a SELECT command is needed. */ 
8228     if (dictid 
!= server
.appendseldb
) { 
8231         snprintf(seldb
,sizeof(seldb
),"%d",dictid
); 
8232         buf 
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", 
8233             (unsigned long)strlen(seldb
),seldb
); 
8234         server
.appendseldb 
= dictid
; 
8237     if (cmd
->proc 
== expireCommand
) { 
8238         /* Translate EXPIRE into EXPIREAT */ 
8239         buf 
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]); 
8240     } else if (cmd
->proc 
== setexCommand
) { 
8241         /* Translate SETEX to SET and EXPIREAT */ 
8242         tmpargv
[0] = createStringObject("SET",3); 
8243         tmpargv
[1] = argv
[1]; 
8244         tmpargv
[2] = argv
[3]; 
8245         buf 
= catAppendOnlyGenericCommand(buf
,3,tmpargv
); 
8246         decrRefCount(tmpargv
[0]); 
8247         buf 
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]); 
8249         buf 
= catAppendOnlyGenericCommand(buf
,argc
,argv
); 
8252     /* Append to the AOF buffer. This will be flushed on disk just before 
8253      * of re-entering the event loop, so before the client will get a 
8254      * positive reply about the operation performed. */ 
8255     server
.aofbuf 
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
)); 
8257     /* If a background append only file rewriting is in progress we want to 
8258      * accumulate the differences between the child DB and the current one 
8259      * in a buffer, so that when the child process will do its work we 
8260      * can append the differences to the new append only file. */ 
8261     if (server
.bgrewritechildpid 
!= -1) 
8262         server
.bgrewritebuf 
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
)); 
8267 /* In Redis commands are always executed in the context of a client, so in 
8268  * order to load the append only file we need to create a fake client. */ 
8269 static struct redisClient 
*createFakeClient(void) { 
8270     struct redisClient 
*c 
= zmalloc(sizeof(*c
)); 
8274     c
->querybuf 
= sdsempty(); 
8278     /* We set the fake client as a slave waiting for the synchronization 
8279      * so that Redis will not try to send replies to this client. */ 
8280     c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
8281     c
->reply 
= listCreate(); 
8282     listSetFreeMethod(c
->reply
,decrRefCount
); 
8283     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
8284     initClientMultiState(c
); 
8288 static void freeFakeClient(struct redisClient 
*c
) { 
8289     sdsfree(c
->querybuf
); 
8290     listRelease(c
->reply
); 
8291     freeClientMultiState(c
); 
8295 /* Replay the append log file. On error REDIS_OK is returned. On non fatal 
8296  * error (the append only file is zero-length) REDIS_ERR is returned. On 
8297  * fatal error an error message is logged and the program exists. */ 
8298 int loadAppendOnlyFile(char *filename
) { 
8299     struct redisClient 
*fakeClient
; 
8300     FILE *fp 
= fopen(filename
,"r"); 
8301     struct redis_stat sb
; 
8302     unsigned long long loadedkeys 
= 0; 
8303     int appendonly 
= server
.appendonly
; 
8305     if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size 
== 0) 
8309         redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
)); 
8313     /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI 
8314      * to the same file we're about to read. */ 
8315     server
.appendonly 
= 0; 
8317     fakeClient 
= createFakeClient(); 
8324         struct redisCommand 
*cmd
; 
8326         if (fgets(buf
,sizeof(buf
),fp
) == NULL
) { 
8332         if (buf
[0] != '*') goto fmterr
; 
8334         argv 
= zmalloc(sizeof(robj
*)*argc
); 
8335         for (j 
= 0; j 
< argc
; j
++) { 
8336             if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
; 
8337             if (buf
[0] != '$') goto fmterr
; 
8338             len 
= strtol(buf
+1,NULL
,10); 
8339             argsds 
= sdsnewlen(NULL
,len
); 
8340             if (len 
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
; 
8341             argv
[j
] = createObject(REDIS_STRING
,argsds
); 
8342             if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */ 
8345         /* Command lookup */ 
8346         cmd 
= lookupCommand(argv
[0]->ptr
); 
8348             redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
); 
8351         /* Try object encoding */ 
8352         if (cmd
->flags 
& REDIS_CMD_BULK
) 
8353             argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]); 
8354         /* Run the command in the context of a fake client */ 
8355         fakeClient
->argc 
= argc
; 
8356         fakeClient
->argv 
= argv
; 
8357         cmd
->proc(fakeClient
); 
8358         /* Discard the reply objects list from the fake client */ 
8359         while(listLength(fakeClient
->reply
)) 
8360             listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
)); 
8361         /* Clean up, ready for the next command */ 
8362         for (j 
= 0; j 
< argc
; j
++) decrRefCount(argv
[j
]); 
8364         /* Handle swapping while loading big datasets when VM is on */ 
8366         if (server
.vm_enabled 
&& (loadedkeys 
% 5000) == 0) { 
8367             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
8368                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
8373     /* This point can only be reached when EOF is reached without errors. 
8374      * If the client is in the middle of a MULTI/EXEC, log error and quit. */ 
8375     if (fakeClient
->flags 
& REDIS_MULTI
) goto readerr
; 
8378     freeFakeClient(fakeClient
); 
8379     server
.appendonly 
= appendonly
; 
8384         redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file"); 
8386         redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
)); 
8390     redisLog(REDIS_WARNING
,"Bad file format reading the append only file"); 
8394 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */ 
8395 static int fwriteBulkObject(FILE *fp
, robj 
*obj
) { 
8399     /* Avoid the incr/decr ref count business if possible to help 
8400      * copy-on-write (we are often in a child process when this function 
8402      * Also makes sure that key objects don't get incrRefCount-ed when VM 
8404     if (obj
->encoding 
!= REDIS_ENCODING_RAW
) { 
8405         obj 
= getDecodedObject(obj
); 
8408     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
)); 
8409     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
; 
8410     if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0) 
8412     if (fwrite("\r\n",2,1,fp
) == 0) goto err
; 
8413     if (decrrc
) decrRefCount(obj
); 
8416     if (decrrc
) decrRefCount(obj
); 
8420 /* Write binary-safe string into a file in the bulkformat 
8421  * $<count>\r\n<payload>\r\n */ 
8422 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) { 
8425     snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
); 
8426     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
8427     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return 0; 
8428     if (fwrite("\r\n",2,1,fp
) == 0) return 0; 
8432 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */ 
8433 static int fwriteBulkDouble(FILE *fp
, double d
) { 
8434     char buf
[128], dbuf
[128]; 
8436     snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
); 
8437     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2); 
8438     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
8439     if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0; 
8443 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */ 
8444 static int fwriteBulkLong(FILE *fp
, long l
) { 
8445     char buf
[128], lbuf
[128]; 
8447     snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
); 
8448     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2); 
8449     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
8450     if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0; 
8454 /* Write a sequence of commands able to fully rebuild the dataset into 
8455  * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ 
8456 static int rewriteAppendOnlyFile(char *filename
) { 
8457     dictIterator 
*di 
= NULL
; 
8462     time_t now 
= time(NULL
); 
8464     /* Note that we have to use a different temp name here compared to the 
8465      * one used by rewriteAppendOnlyFileBackground() function. */ 
8466     snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid()); 
8467     fp 
= fopen(tmpfile
,"w"); 
8469         redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
)); 
8472     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8473         char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n"; 
8474         redisDb 
*db 
= server
.db
+j
; 
8476         if (dictSize(d
) == 0) continue; 
8477         di 
= dictGetIterator(d
); 
8483         /* SELECT the new DB */ 
8484         if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
; 
8485         if (fwriteBulkLong(fp
,j
) == 0) goto werr
; 
8487         /* Iterate this DB writing every entry */ 
8488         while((de 
= dictNext(di
)) != NULL
) { 
8493             key 
= dictGetEntryKey(de
); 
8494             /* If the value for this key is swapped, load a preview in memory. 
8495              * We use a "swapped" flag to remember if we need to free the 
8496              * value object instead to just increment the ref count anyway 
8497              * in order to avoid copy-on-write of pages if we are forked() */ 
8498             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
8499                 key
->storage 
== REDIS_VM_SWAPPING
) { 
8500                 o 
= dictGetEntryVal(de
); 
8503                 o 
= vmPreviewObject(key
); 
8506             expiretime 
= getExpire(db
,key
); 
8508             /* Save the key and associated value */ 
8509             if (o
->type 
== REDIS_STRING
) { 
8510                 /* Emit a SET command */ 
8511                 char cmd
[]="*3\r\n$3\r\nSET\r\n"; 
8512                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8514                 if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8515                 if (fwriteBulkObject(fp
,o
) == 0) goto werr
; 
8516             } else if (o
->type 
== REDIS_LIST
) { 
8517                 /* Emit the RPUSHes needed to rebuild the list */ 
8518                 list 
*list 
= o
->ptr
; 
8522                 listRewind(list
,&li
); 
8523                 while((ln 
= listNext(&li
))) { 
8524                     char cmd
[]="*3\r\n$5\r\nRPUSH\r\n"; 
8525                     robj 
*eleobj 
= listNodeValue(ln
); 
8527                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8528                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8529                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
8531             } else if (o
->type 
== REDIS_SET
) { 
8532                 /* Emit the SADDs needed to rebuild the set */ 
8534                 dictIterator 
*di 
= dictGetIterator(set
); 
8537                 while((de 
= dictNext(di
)) != NULL
) { 
8538                     char cmd
[]="*3\r\n$4\r\nSADD\r\n"; 
8539                     robj 
*eleobj 
= dictGetEntryKey(de
); 
8541                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8542                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8543                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
8545                 dictReleaseIterator(di
); 
8546             } else if (o
->type 
== REDIS_ZSET
) { 
8547                 /* Emit the ZADDs needed to rebuild the sorted set */ 
8549                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
8552                 while((de 
= dictNext(di
)) != NULL
) { 
8553                     char cmd
[]="*4\r\n$4\r\nZADD\r\n"; 
8554                     robj 
*eleobj 
= dictGetEntryKey(de
); 
8555                     double *score 
= dictGetEntryVal(de
); 
8557                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8558                     if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8559                     if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
; 
8560                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
8562                 dictReleaseIterator(di
); 
8563             } else if (o
->type 
== REDIS_HASH
) { 
8564                 char cmd
[]="*4\r\n$4\r\nHSET\r\n"; 
8566                 /* Emit the HSETs needed to rebuild the hash */ 
8567                 if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
8568                     unsigned char *p 
= zipmapRewind(o
->ptr
); 
8569                     unsigned char *field
, *val
; 
8570                     unsigned int flen
, vlen
; 
8572                     while((p 
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) { 
8573                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8574                         if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8575                         if (fwriteBulkString(fp
,(char*)field
,flen
) == -1) 
8577                         if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1) 
8581                     dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
8584                     while((de 
= dictNext(di
)) != NULL
) { 
8585                         robj 
*field 
= dictGetEntryKey(de
); 
8586                         robj 
*val 
= dictGetEntryVal(de
); 
8588                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8589                         if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8590                         if (fwriteBulkObject(fp
,field
) == -1) return -1; 
8591                         if (fwriteBulkObject(fp
,val
) == -1) return -1; 
8593                     dictReleaseIterator(di
); 
8596                 redisPanic("Unknown object type"); 
8598             /* Save the expire time */ 
8599             if (expiretime 
!= -1) { 
8600                 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n"; 
8601                 /* If this key is already expired skip it */ 
8602                 if (expiretime 
< now
) continue; 
8603                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
8604                 if (fwriteBulkObject(fp
,key
) == 0) goto werr
; 
8605                 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
; 
8607             if (swapped
) decrRefCount(o
); 
8609         dictReleaseIterator(di
); 
8612     /* Make sure data will not remain on the OS's output buffers */ 
8617     /* Use RENAME to make sure the DB file is changed atomically only 
8618      * if the generate DB file is ok. */ 
8619     if (rename(tmpfile
,filename
) == -1) { 
8620         redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
)); 
8624     redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed"); 
8630     redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
)); 
8631     if (di
) dictReleaseIterator(di
); 
8635 /* This is how rewriting of the append only file in background works: 
8637  * 1) The user calls BGREWRITEAOF 
8638  * 2) Redis calls this function, that forks(): 
8639  *    2a) the child rewrite the append only file in a temp file. 
8640  *    2b) the parent accumulates differences in server.bgrewritebuf. 
8641  * 3) When the child finished '2a' exists. 
8642  * 4) The parent will trap the exit code, if it's OK, will append the 
8643  *    data accumulated into server.bgrewritebuf into the temp file, and 
8644  *    finally will rename(2) the temp file in the actual file name. 
8645  *    The the new file is reopened as the new append only file. Profit! 
8647 static int rewriteAppendOnlyFileBackground(void) { 
8650     if (server
.bgrewritechildpid 
!= -1) return REDIS_ERR
; 
8651     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
8652     if ((childpid 
= fork()) == 0) { 
8656         if (server
.vm_enabled
) vmReopenSwapFile(); 
8658         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); 
8659         if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) { 
8666         if (childpid 
== -1) { 
8667             redisLog(REDIS_WARNING
, 
8668                 "Can't rewrite append only file in background: fork: %s", 
8672         redisLog(REDIS_NOTICE
, 
8673             "Background append only file rewriting started by pid %d",childpid
); 
8674         server
.bgrewritechildpid 
= childpid
; 
8675         updateDictResizePolicy(); 
8676         /* We set appendseldb to -1 in order to force the next call to the 
8677          * feedAppendOnlyFile() to issue a SELECT command, so the differences 
8678          * accumulated by the parent into server.bgrewritebuf will start 
8679          * with a SELECT statement and it will be safe to merge. */ 
8680         server
.appendseldb 
= -1; 
8683     return REDIS_OK
; /* unreached */ 
8686 static void bgrewriteaofCommand(redisClient 
*c
) { 
8687     if (server
.bgrewritechildpid 
!= -1) { 
8688         addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n")); 
8691     if (rewriteAppendOnlyFileBackground() == REDIS_OK
) { 
8692         char *status 
= "+Background append only file rewriting started\r\n"; 
8693         addReplySds(c
,sdsnew(status
)); 
8695         addReply(c
,shared
.err
); 
8699 static void aofRemoveTempFile(pid_t childpid
) { 
8702     snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
); 
8706 /* Virtual Memory is composed mainly of two subsystems: 
8707  * - Blocking Virutal Memory 
8708  * - Threaded Virtual Memory I/O 
8709  * The two parts are not fully decoupled, but functions are split among two 
8710  * different sections of the source code (delimited by comments) in order to 
8711  * make more clear what functionality is about the blocking VM and what about 
8712  * the threaded (not blocking) VM. 
8716  * Redis VM is a blocking VM (one that blocks reading swapped values from 
8717  * disk into memory when a value swapped out is needed in memory) that is made 
8718  * unblocking by trying to examine the command argument vector in order to 
8719  * load in background values that will likely be needed in order to exec 
8720  * the command. The command is executed only once all the relevant keys 
8721  * are loaded into memory. 
8723  * This basically is almost as simple of a blocking VM, but almost as parallel 
8724  * as a fully non-blocking VM. 
8727 /* Called when the user switches from "appendonly yes" to "appendonly no" 
8728  * at runtime using the CONFIG command. */ 
8729 static void stopAppendOnly(void) { 
8730     flushAppendOnlyFile(); 
8731     fsync(server
.appendfd
); 
8732     close(server
.appendfd
); 
8734     server
.appendfd 
= -1; 
8735     server
.appendseldb 
= -1; 
8736     server
.appendonly 
= 0; 
8737     /* rewrite operation in progress? kill it, wait child exit */ 
8738     if (server
.bgsavechildpid 
!= -1) { 
8741         if (kill(server
.bgsavechildpid
,SIGKILL
) != -1) 
8742             wait3(&statloc
,0,NULL
); 
8743         /* reset the buffer accumulating changes while the child saves */ 
8744         sdsfree(server
.bgrewritebuf
); 
8745         server
.bgrewritebuf 
= sdsempty(); 
8746         server
.bgsavechildpid 
= -1; 
8750 /* Called when the user switches from "appendonly no" to "appendonly yes" 
8751  * at runtime using the CONFIG command. */ 
8752 static int startAppendOnly(void) { 
8753     server
.appendonly 
= 1; 
8754     server
.lastfsync 
= time(NULL
); 
8755     server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
8756     if (server
.appendfd 
== -1) { 
8757         redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
)); 
8760     if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) { 
8761         server
.appendonly 
= 0; 
8762         close(server
.appendfd
); 
8763         redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
)); 
8769 /* =================== Virtual Memory - Blocking Side  ====================== */ 
8771 static void vmInit(void) { 
8777     if (server
.vm_max_threads 
!= 0) 
8778         zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */ 
8780     redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
); 
8781     /* Try to open the old swap file, otherwise create it */ 
8782     if ((server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) { 
8783         server
.vm_fp 
= fopen(server
.vm_swap_file
,"w+b"); 
8785     if (server
.vm_fp 
== NULL
) { 
8786         redisLog(REDIS_WARNING
, 
8787             "Can't open the swap file: %s. Exiting.", 
8791     server
.vm_fd 
= fileno(server
.vm_fp
); 
8792     /* Lock the swap file for writing, this is useful in order to avoid 
8793      * another instance to use the same swap file for a config error. */ 
8794     fl
.l_type 
= F_WRLCK
; 
8795     fl
.l_whence 
= SEEK_SET
; 
8796     fl
.l_start 
= fl
.l_len 
= 0; 
8797     if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) { 
8798         redisLog(REDIS_WARNING
, 
8799             "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
)); 
8803     server
.vm_next_page 
= 0; 
8804     server
.vm_near_pages 
= 0; 
8805     server
.vm_stats_used_pages 
= 0; 
8806     server
.vm_stats_swapped_objects 
= 0; 
8807     server
.vm_stats_swapouts 
= 0; 
8808     server
.vm_stats_swapins 
= 0; 
8809     totsize 
= server
.vm_pages
*server
.vm_page_size
; 
8810     redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
); 
8811     if (ftruncate(server
.vm_fd
,totsize
) == -1) { 
8812         redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.", 
8816         redisLog(REDIS_NOTICE
,"Swap file allocated with success"); 
8818     server
.vm_bitmap 
= zmalloc((server
.vm_pages
+7)/8); 
8819     redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages", 
8820         (long long) (server
.vm_pages
+7)/8, server
.vm_pages
); 
8821     memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8); 
8823     /* Initialize threaded I/O (used by Virtual Memory) */ 
8824     server
.io_newjobs 
= listCreate(); 
8825     server
.io_processing 
= listCreate(); 
8826     server
.io_processed 
= listCreate(); 
8827     server
.io_ready_clients 
= listCreate(); 
8828     pthread_mutex_init(&server
.io_mutex
,NULL
); 
8829     pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
); 
8830     pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
); 
8831     server
.io_active_threads 
= 0; 
8832     if (pipe(pipefds
) == -1) { 
8833         redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting." 
8837     server
.io_ready_pipe_read 
= pipefds
[0]; 
8838     server
.io_ready_pipe_write 
= pipefds
[1]; 
8839     redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
); 
8840     /* LZF requires a lot of stack */ 
8841     pthread_attr_init(&server
.io_threads_attr
); 
8842     pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
); 
8843     while (stacksize 
< REDIS_THREAD_STACK_SIZE
) stacksize 
*= 2; 
8844     pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
); 
8845     /* Listen for events in the threaded I/O pipe */ 
8846     if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
, 
8847         vmThreadedIOCompletedJob
, NULL
) == AE_ERR
) 
8848         oom("creating file event"); 
8851 /* Mark the page as used */ 
8852 static void vmMarkPageUsed(off_t page
) { 
8853     off_t byte 
= page
/8; 
8855     redisAssert(vmFreePage(page
) == 1); 
8856     server
.vm_bitmap
[byte
] |= 1<<bit
; 
8859 /* Mark N contiguous pages as used, with 'page' being the first. */ 
8860 static void vmMarkPagesUsed(off_t page
, off_t count
) { 
8863     for (j 
= 0; j 
< count
; j
++) 
8864         vmMarkPageUsed(page
+j
); 
8865     server
.vm_stats_used_pages 
+= count
; 
8866     redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n", 
8867         (long long)count
, (long long)page
); 
8870 /* Mark the page as free */ 
8871 static void vmMarkPageFree(off_t page
) { 
8872     off_t byte 
= page
/8; 
8874     redisAssert(vmFreePage(page
) == 0); 
8875     server
.vm_bitmap
[byte
] &= ~(1<<bit
); 
8878 /* Mark N contiguous pages as free, with 'page' being the first. */ 
8879 static void vmMarkPagesFree(off_t page
, off_t count
) { 
8882     for (j 
= 0; j 
< count
; j
++) 
8883         vmMarkPageFree(page
+j
); 
8884     server
.vm_stats_used_pages 
-= count
; 
8885     redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n", 
8886         (long long)count
, (long long)page
); 
8889 /* Test if the page is free */ 
8890 static int vmFreePage(off_t page
) { 
8891     off_t byte 
= page
/8; 
8893     return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0; 
8896 /* Find N contiguous free pages storing the first page of the cluster in *first. 
8897  * Returns REDIS_OK if it was able to find N contiguous pages, otherwise 
8898  * REDIS_ERR is returned. 
8900  * This function uses a simple algorithm: we try to allocate 
8901  * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start 
8902  * again from the start of the swap file searching for free spaces. 
8904  * If it looks pretty clear that there are no free pages near our offset 
8905  * we try to find less populated places doing a forward jump of 
8906  * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages 
8907  * without hurry, and then we jump again and so forth... 
8909  * This function can be improved using a free list to avoid to guess 
8910  * too much, since we could collect data about freed pages. 
8912  * note: I implemented this function just after watching an episode of 
8913  * Battlestar Galactica, where the hybrid was continuing to say "JUMP!" 
8915 static int vmFindContiguousPages(off_t 
*first
, off_t n
) { 
8916     off_t base
, offset 
= 0, since_jump 
= 0, numfree 
= 0; 
8918     if (server
.vm_near_pages 
== REDIS_VM_MAX_NEAR_PAGES
) { 
8919         server
.vm_near_pages 
= 0; 
8920         server
.vm_next_page 
= 0; 
8922     server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */ 
8923     base 
= server
.vm_next_page
; 
8925     while(offset 
< server
.vm_pages
) { 
8926         off_t 
this = base
+offset
; 
8928         /* If we overflow, restart from page zero */ 
8929         if (this >= server
.vm_pages
) { 
8930             this -= server
.vm_pages
; 
8932                 /* Just overflowed, what we found on tail is no longer 
8933                  * interesting, as it's no longer contiguous. */ 
8937         if (vmFreePage(this)) { 
8938             /* This is a free page */ 
8940             /* Already got N free pages? Return to the caller, with success */ 
8942                 *first 
= this-(n
-1); 
8943                 server
.vm_next_page 
= this+1; 
8944                 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
); 
8948             /* The current one is not a free page */ 
8952         /* Fast-forward if the current page is not free and we already 
8953          * searched enough near this place. */ 
8955         if (!numfree 
&& since_jump 
>= REDIS_VM_MAX_RANDOM_JUMP
/4) { 
8956             offset 
+= random() % REDIS_VM_MAX_RANDOM_JUMP
; 
8958             /* Note that even if we rewind after the jump, we are don't need 
8959              * to make sure numfree is set to zero as we only jump *if* it 
8960              * is set to zero. */ 
8962             /* Otherwise just check the next page */ 
8969 /* Write the specified object at the specified page of the swap file */ 
8970 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
) { 
8971     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
8972     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
8973         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8974         redisLog(REDIS_WARNING
, 
8975             "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s", 
8979     rdbSaveObject(server
.vm_fp
,o
); 
8980     fflush(server
.vm_fp
); 
8981     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
8985 /* Swap the 'val' object relative to 'key' into disk. Store all the information 
8986  * needed to later retrieve the object into the key object. 
8987  * If we can't find enough contiguous empty pages to swap the object on disk 
8988  * REDIS_ERR is returned. */ 
8989 static int vmSwapObjectBlocking(robj 
*key
, robj 
*val
) { 
8990     off_t pages 
= rdbSavedObjectPages(val
,NULL
); 
8993     assert(key
->storage 
== REDIS_VM_MEMORY
); 
8994     assert(key
->refcount 
== 1); 
8995     if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
; 
8996     if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
; 
8997     key
->vm
.page 
= page
; 
8998     key
->vm
.usedpages 
= pages
; 
8999     key
->storage 
= REDIS_VM_SWAPPED
; 
9000     key
->vtype 
= val
->type
; 
9001     decrRefCount(val
); /* Deallocate the object from memory. */ 
9002     vmMarkPagesUsed(page
,pages
); 
9003     redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)", 
9004         (unsigned char*) key
->ptr
, 
9005         (unsigned long long) page
, (unsigned long long) pages
); 
9006     server
.vm_stats_swapped_objects
++; 
9007     server
.vm_stats_swapouts
++; 
9011 static robj 
*vmReadObjectFromSwap(off_t page
, int type
) { 
9014     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
9015     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
9016         redisLog(REDIS_WARNING
, 
9017             "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s", 
9021     o 
= rdbLoadObject(type
,server
.vm_fp
); 
9023         redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
)); 
9026     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
9030 /* Load the value object relative to the 'key' object from swap to memory. 
9031  * The newly allocated object is returned. 
9033  * If preview is true the unserialized object is returned to the caller but 
9034  * no changes are made to the key object, nor the pages are marked as freed */ 
9035 static robj 
*vmGenericLoadObject(robj 
*key
, int preview
) { 
9038     redisAssert(key
->storage 
== REDIS_VM_SWAPPED 
|| key
->storage 
== REDIS_VM_LOADING
); 
9039     val 
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
); 
9041         key
->storage 
= REDIS_VM_MEMORY
; 
9042         key
->vm
.atime 
= server
.unixtime
; 
9043         vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
9044         redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk", 
9045             (unsigned char*) key
->ptr
); 
9046         server
.vm_stats_swapped_objects
--; 
9048         redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk", 
9049             (unsigned char*) key
->ptr
); 
9051     server
.vm_stats_swapins
++; 
9055 /* Plain object loading, from swap to memory */ 
9056 static robj 
*vmLoadObject(robj 
*key
) { 
9057     /* If we are loading the object in background, stop it, we 
9058      * need to load this object synchronously ASAP. */ 
9059     if (key
->storage 
== REDIS_VM_LOADING
) 
9060         vmCancelThreadedIOJob(key
); 
9061     return vmGenericLoadObject(key
,0); 
9064 /* Just load the value on disk, without to modify the key. 
9065  * This is useful when we want to perform some operation on the value 
9066  * without to really bring it from swap to memory, like while saving the 
9067  * dataset or rewriting the append only log. */ 
9068 static robj 
*vmPreviewObject(robj 
*key
) { 
9069     return vmGenericLoadObject(key
,1); 
9072 /* How a good candidate is this object for swapping? 
9073  * The better candidate it is, the greater the returned value. 
9075  * Currently we try to perform a fast estimation of the object size in 
9076  * memory, and combine it with aging informations. 
9078  * Basically swappability = idle-time * log(estimated size) 
9080  * Bigger objects are preferred over smaller objects, but not 
9081  * proportionally, this is why we use the logarithm. This algorithm is 
9082  * just a first try and will probably be tuned later. */ 
9083 static double computeObjectSwappability(robj 
*o
) { 
9084     time_t age 
= server
.unixtime 
- o
->vm
.atime
; 
9088     struct dictEntry 
*de
; 
9091     if (age 
<= 0) return 0; 
9094         if (o
->encoding 
!= REDIS_ENCODING_RAW
) { 
9097             asize 
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2; 
9102         listNode 
*ln 
= listFirst(l
); 
9104         asize 
= sizeof(list
); 
9106             robj 
*ele 
= ln
->value
; 
9109             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9110                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
9112             asize 
+= (sizeof(listNode
)+elesize
)*listLength(l
); 
9117         z 
= (o
->type 
== REDIS_ZSET
); 
9118         d 
= z 
? ((zset
*)o
->ptr
)->dict 
: o
->ptr
; 
9120         asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
9121         if (z
) asize 
+= sizeof(zset
)-sizeof(dict
); 
9126             de 
= dictGetRandomKey(d
); 
9127             ele 
= dictGetEntryKey(de
); 
9128             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9129                             (sizeof(*o
)+sdslen(ele
->ptr
)) : 
9131             asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
9132             if (z
) asize 
+= sizeof(zskiplistNode
)*dictSize(d
); 
9136         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
9137             unsigned char *p 
= zipmapRewind((unsigned char*)o
->ptr
); 
9138             unsigned int len 
= zipmapLen((unsigned char*)o
->ptr
); 
9139             unsigned int klen
, vlen
; 
9140             unsigned char *key
, *val
; 
9142             if ((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) { 
9146             asize 
= len
*(klen
+vlen
+3); 
9147         } else if (o
->encoding 
== REDIS_ENCODING_HT
) { 
9149             asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
9154                 de 
= dictGetRandomKey(d
); 
9155                 ele 
= dictGetEntryKey(de
); 
9156                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9157                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : 
9159                 ele 
= dictGetEntryVal(de
); 
9160                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9161                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : 
9163                 asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
9168     return (double)age
*log(1+asize
); 
9171 /* Try to swap an object that's a good candidate for swapping. 
9172  * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible 
9173  * to swap any object at all. 
9175  * If 'usethreaded' is true, Redis will try to swap the object in background 
9176  * using I/O threads. */ 
9177 static int vmSwapOneObject(int usethreads
) { 
9179     struct dictEntry 
*best 
= NULL
; 
9180     double best_swappability 
= 0; 
9181     redisDb 
*best_db 
= NULL
; 
9184     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
9185         redisDb 
*db 
= server
.db
+j
; 
9186         /* Why maxtries is set to 100? 
9187          * Because this way (usually) we'll find 1 object even if just 1% - 2% 
9188          * are swappable objects */ 
9191         if (dictSize(db
->dict
) == 0) continue; 
9192         for (i 
= 0; i 
< 5; i
++) { 
9194             double swappability
; 
9196             if (maxtries
) maxtries
--; 
9197             de 
= dictGetRandomKey(db
->dict
); 
9198             key 
= dictGetEntryKey(de
); 
9199             val 
= dictGetEntryVal(de
); 
9200             /* Only swap objects that are currently in memory. 
9202              * Also don't swap shared objects if threaded VM is on, as we 
9203              * try to ensure that the main thread does not touch the 
9204              * object while the I/O thread is using it, but we can't 
9205              * control other keys without adding additional mutex. */ 
9206             if (key
->storage 
!= REDIS_VM_MEMORY 
|| 
9207                 (server
.vm_max_threads 
!= 0 && val
->refcount 
!= 1)) { 
9208                 if (maxtries
) i
--; /* don't count this try */ 
9211             swappability 
= computeObjectSwappability(val
); 
9212             if (!best 
|| swappability 
> best_swappability
) { 
9214                 best_swappability 
= swappability
; 
9219     if (best 
== NULL
) return REDIS_ERR
; 
9220     key 
= dictGetEntryKey(best
); 
9221     val 
= dictGetEntryVal(best
); 
9223     redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f", 
9224         key
->ptr
, best_swappability
); 
9226     /* Unshare the key if needed */ 
9227     if (key
->refcount 
> 1) { 
9228         robj 
*newkey 
= dupStringObject(key
); 
9230         key 
= dictGetEntryKey(best
) = newkey
; 
9234         vmSwapObjectThreaded(key
,val
,best_db
); 
9237         if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
9238             dictGetEntryVal(best
) = NULL
; 
9246 static int vmSwapOneObjectBlocking() { 
9247     return vmSwapOneObject(0); 
9250 static int vmSwapOneObjectThreaded() { 
9251     return vmSwapOneObject(1); 
9254 /* Return true if it's safe to swap out objects in a given moment. 
9255  * Basically we don't want to swap objects out while there is a BGSAVE 
9256  * or a BGAEOREWRITE running in backgroud. */ 
9257 static int vmCanSwapOut(void) { 
9258     return (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1); 
9261 /* Delete a key if swapped. Returns 1 if the key was found, was swapped 
9262  * and was deleted. Otherwise 0 is returned. */ 
9263 static int deleteIfSwapped(redisDb 
*db
, robj 
*key
) { 
9267     if ((de 
= dictFind(db
->dict
,key
)) == NULL
) return 0; 
9268     foundkey 
= dictGetEntryKey(de
); 
9269     if (foundkey
->storage 
== REDIS_VM_MEMORY
) return 0; 
9274 /* =================== Virtual Memory - Threaded I/O  ======================= */ 
9276 static void freeIOJob(iojob 
*j
) { 
9277     if ((j
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
9278         j
->type 
== REDIS_IOJOB_DO_SWAP 
|| 
9279         j
->type 
== REDIS_IOJOB_LOAD
) && j
->val 
!= NULL
) 
9280         decrRefCount(j
->val
); 
9281     /* We don't decrRefCount the j->key field as we did't incremented 
9282      * the count creating IO Jobs. This is because the key field here is 
9283      * just used as an indentifier and if a key is removed the Job should 
9284      * never be touched again. */ 
9288 /* Every time a thread finished a Job, it writes a byte into the write side 
9289  * of an unix pipe in order to "awake" the main thread, and this function 
9291 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, 
9295     int retval
, processed 
= 0, toprocess 
= -1, trytoswap 
= 1; 
9297     REDIS_NOTUSED(mask
); 
9298     REDIS_NOTUSED(privdata
); 
9300     /* For every byte we read in the read side of the pipe, there is one 
9301      * I/O job completed to process. */ 
9302     while((retval 
= read(fd
,buf
,1)) == 1) { 
9306         struct dictEntry 
*de
; 
9308         redisLog(REDIS_DEBUG
,"Processing I/O completed job"); 
9310         /* Get the processed element (the oldest one) */ 
9312         assert(listLength(server
.io_processed
) != 0); 
9313         if (toprocess 
== -1) { 
9314             toprocess 
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100; 
9315             if (toprocess 
<= 0) toprocess 
= 1; 
9317         ln 
= listFirst(server
.io_processed
); 
9319         listDelNode(server
.io_processed
,ln
); 
9321         /* If this job is marked as canceled, just ignore it */ 
9326         /* Post process it in the main thread, as there are things we 
9327          * can do just here to avoid race conditions and/or invasive locks */ 
9328         redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
); 
9329         de 
= dictFind(j
->db
->dict
,j
->key
); 
9331         key 
= dictGetEntryKey(de
); 
9332         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
9335             /* Key loaded, bring it at home */ 
9336             key
->storage 
= REDIS_VM_MEMORY
; 
9337             key
->vm
.atime 
= server
.unixtime
; 
9338             vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
); 
9339             redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)", 
9340                 (unsigned char*) key
->ptr
); 
9341             server
.vm_stats_swapped_objects
--; 
9342             server
.vm_stats_swapins
++; 
9343             dictGetEntryVal(de
) = j
->val
; 
9344             incrRefCount(j
->val
); 
9347             /* Handle clients waiting for this key to be loaded. */ 
9348             handleClientsBlockedOnSwappedKey(db
,key
); 
9349         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
9350             /* Now we know the amount of pages required to swap this object. 
9351              * Let's find some space for it, and queue this task again 
9352              * rebranded as REDIS_IOJOB_DO_SWAP. */ 
9353             if (!vmCanSwapOut() || 
9354                 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
) 
9356                 /* Ooops... no space or we can't swap as there is 
9357                  * a fork()ed Redis trying to save stuff on disk. */ 
9359                 key
->storage 
= REDIS_VM_MEMORY
; /* undo operation */ 
9361                 /* Note that we need to mark this pages as used now, 
9362                  * if the job will be canceled, we'll mark them as freed 
9364                 vmMarkPagesUsed(j
->page
,j
->pages
); 
9365                 j
->type 
= REDIS_IOJOB_DO_SWAP
; 
9370         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
9373             /* Key swapped. We can finally free some memory. */ 
9374             if (key
->storage 
!= REDIS_VM_SWAPPING
) { 
9375                 printf("key->storage: %d\n",key
->storage
); 
9376                 printf("key->name: %s\n",(char*)key
->ptr
); 
9377                 printf("key->refcount: %d\n",key
->refcount
); 
9378                 printf("val: %p\n",(void*)j
->val
); 
9379                 printf("val->type: %d\n",j
->val
->type
); 
9380                 printf("val->ptr: %s\n",(char*)j
->val
->ptr
); 
9382             redisAssert(key
->storage 
== REDIS_VM_SWAPPING
); 
9383             val 
= dictGetEntryVal(de
); 
9384             key
->vm
.page 
= j
->page
; 
9385             key
->vm
.usedpages 
= j
->pages
; 
9386             key
->storage 
= REDIS_VM_SWAPPED
; 
9387             key
->vtype 
= j
->val
->type
; 
9388             decrRefCount(val
); /* Deallocate the object from memory. */ 
9389             dictGetEntryVal(de
) = NULL
; 
9390             redisLog(REDIS_DEBUG
, 
9391                 "VM: object %s swapped out at %lld (%lld pages) (threaded)", 
9392                 (unsigned char*) key
->ptr
, 
9393                 (unsigned long long) j
->page
, (unsigned long long) j
->pages
); 
9394             server
.vm_stats_swapped_objects
++; 
9395             server
.vm_stats_swapouts
++; 
9397             /* Put a few more swap requests in queue if we are still 
9399             if (trytoswap 
&& vmCanSwapOut() && 
9400                 zmalloc_used_memory() > server
.vm_max_memory
) 
9405                     more 
= listLength(server
.io_newjobs
) < 
9406                             (unsigned) server
.vm_max_threads
; 
9408                     /* Don't waste CPU time if swappable objects are rare. */ 
9409                     if (vmSwapOneObjectThreaded() == REDIS_ERR
) { 
9417         if (processed 
== toprocess
) return; 
9419     if (retval 
< 0 && errno 
!= EAGAIN
) { 
9420         redisLog(REDIS_WARNING
, 
9421             "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", 
9426 static void lockThreadedIO(void) { 
9427     pthread_mutex_lock(&server
.io_mutex
); 
9430 static void unlockThreadedIO(void) { 
9431     pthread_mutex_unlock(&server
.io_mutex
); 
9434 /* Remove the specified object from the threaded I/O queue if still not 
9435  * processed, otherwise make sure to flag it as canceled. */ 
9436 static void vmCancelThreadedIOJob(robj 
*o
) { 
9438         server
.io_newjobs
,      /* 0 */ 
9439         server
.io_processing
,   /* 1 */ 
9440         server
.io_processed     
/* 2 */ 
9444     assert(o
->storage 
== REDIS_VM_LOADING 
|| o
->storage 
== REDIS_VM_SWAPPING
); 
9447     /* Search for a matching key in one of the queues */ 
9448     for (i 
= 0; i 
< 3; i
++) { 
9452         listRewind(lists
[i
],&li
); 
9453         while ((ln 
= listNext(&li
)) != NULL
) { 
9454             iojob 
*job 
= ln
->value
; 
9456             if (job
->canceled
) continue; /* Skip this, already canceled. */ 
9457             if (job
->key 
== o
) { 
9458                 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n", 
9459                     (void*)job
, (char*)o
->ptr
, job
->type
, i
); 
9460                 /* Mark the pages as free since the swap didn't happened 
9461                  * or happened but is now discarded. */ 
9462                 if (i 
!= 1 && job
->type 
== REDIS_IOJOB_DO_SWAP
) 
9463                     vmMarkPagesFree(job
->page
,job
->pages
); 
9464                 /* Cancel the job. It depends on the list the job is 
9467                 case 0: /* io_newjobs */ 
9468                     /* If the job was yet not processed the best thing to do 
9469                      * is to remove it from the queue at all */ 
9471                     listDelNode(lists
[i
],ln
); 
9473                 case 1: /* io_processing */ 
9474                     /* Oh Shi- the thread is messing with the Job: 
9476                      * Probably it's accessing the object if this is a 
9477                      * PREPARE_SWAP or DO_SWAP job. 
9478                      * If it's a LOAD job it may be reading from disk and 
9479                      * if we don't wait for the job to terminate before to 
9480                      * cancel it, maybe in a few microseconds data can be 
9481                      * corrupted in this pages. So the short story is: 
9483                      * Better to wait for the job to move into the 
9484                      * next queue (processed)... */ 
9486                     /* We try again and again until the job is completed. */ 
9488                     /* But let's wait some time for the I/O thread 
9489                      * to finish with this job. After all this condition 
9490                      * should be very rare. */ 
9493                 case 2: /* io_processed */ 
9494                     /* The job was already processed, that's easy... 
9495                      * just mark it as canceled so that we'll ignore it 
9496                      * when processing completed jobs. */ 
9500                 /* Finally we have to adjust the storage type of the object 
9501                  * in order to "UNDO" the operaiton. */ 
9502                 if (o
->storage 
== REDIS_VM_LOADING
) 
9503                     o
->storage 
= REDIS_VM_SWAPPED
; 
9504                 else if (o
->storage 
== REDIS_VM_SWAPPING
) 
9505                     o
->storage 
= REDIS_VM_MEMORY
; 
9512     assert(1 != 1); /* We should never reach this */ 
9515 static void *IOThreadEntryPoint(void *arg
) { 
9520     pthread_detach(pthread_self()); 
9522         /* Get a new job to process */ 
9524         if (listLength(server
.io_newjobs
) == 0) { 
9525             /* No new jobs in queue, exit. */ 
9526             redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do", 
9527                 (long) pthread_self()); 
9528             server
.io_active_threads
--; 
9532         ln 
= listFirst(server
.io_newjobs
); 
9534         listDelNode(server
.io_newjobs
,ln
); 
9535         /* Add the job in the processing queue */ 
9536         j
->thread 
= pthread_self(); 
9537         listAddNodeTail(server
.io_processing
,j
); 
9538         ln 
= listLast(server
.io_processing
); /* We use ln later to remove it */ 
9540         redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'", 
9541             (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
); 
9543         /* Process the Job */ 
9544         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
9545             j
->val 
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
); 
9546         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
9547             FILE *fp 
= fopen("/dev/null","w+"); 
9548             j
->pages 
= rdbSavedObjectPages(j
->val
,fp
); 
9550         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
9551             if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
) 
9555         /* Done: insert the job into the processed queue */ 
9556         redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)", 
9557             (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
); 
9559         listDelNode(server
.io_processing
,ln
); 
9560         listAddNodeTail(server
.io_processed
,j
); 
9563         /* Signal the main thread there is new stuff to process */ 
9564         assert(write(server
.io_ready_pipe_write
,"x",1) == 1); 
9566     return NULL
; /* never reached */ 
9569 static void spawnIOThread(void) { 
9571     sigset_t mask
, omask
; 
9575     sigaddset(&mask
,SIGCHLD
); 
9576     sigaddset(&mask
,SIGHUP
); 
9577     sigaddset(&mask
,SIGPIPE
); 
9578     pthread_sigmask(SIG_SETMASK
, &mask
, &omask
); 
9579     while ((err 
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) { 
9580         redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s", 
9584     pthread_sigmask(SIG_SETMASK
, &omask
, NULL
); 
9585     server
.io_active_threads
++; 
9588 /* We need to wait for the last thread to exit before we are able to 
9589  * fork() in order to BGSAVE or BGREWRITEAOF. */ 
9590 static void waitEmptyIOJobsQueue(void) { 
9592         int io_processed_len
; 
9595         if (listLength(server
.io_newjobs
) == 0 && 
9596             listLength(server
.io_processing
) == 0 && 
9597             server
.io_active_threads 
== 0) 
9602         /* While waiting for empty jobs queue condition we post-process some 
9603          * finshed job, as I/O threads may be hanging trying to write against 
9604          * the io_ready_pipe_write FD but there are so much pending jobs that 
9606         io_processed_len 
= listLength(server
.io_processed
); 
9608         if (io_processed_len
) { 
9609             vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0); 
9610             usleep(1000); /* 1 millisecond */ 
9612             usleep(10000); /* 10 milliseconds */ 
9617 static void vmReopenSwapFile(void) { 
9618     /* Note: we don't close the old one as we are in the child process 
9619      * and don't want to mess at all with the original file object. */ 
9620     server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b"); 
9621     if (server
.vm_fp 
== NULL
) { 
9622         redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.", 
9623             server
.vm_swap_file
); 
9626     server
.vm_fd 
= fileno(server
.vm_fp
); 
9629 /* This function must be called while with threaded IO locked */ 
9630 static void queueIOJob(iojob 
*j
) { 
9631     redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n", 
9632         (void*)j
, j
->type
, (char*)j
->key
->ptr
); 
9633     listAddNodeTail(server
.io_newjobs
,j
); 
9634     if (server
.io_active_threads 
< server
.vm_max_threads
) 
9638 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
) { 
9641     assert(key
->storage 
== REDIS_VM_MEMORY
); 
9642     assert(key
->refcount 
== 1); 
9644     j 
= zmalloc(sizeof(*j
)); 
9645     j
->type 
= REDIS_IOJOB_PREPARE_SWAP
; 
9651     j
->thread 
= (pthread_t
) -1; 
9652     key
->storage 
= REDIS_VM_SWAPPING
; 
9660 /* ============ Virtual Memory - Blocking clients on missing keys =========== */ 
9662 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded. 
9663  * If there is not already a job loading the key, it is craeted. 
9664  * The key is added to the io_keys list in the client structure, and also 
9665  * in the hash table mapping swapped keys to waiting clients, that is, 
9666  * server.io_waited_keys. */ 
9667 static int waitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
9668     struct dictEntry 
*de
; 
9672     /* If the key does not exist or is already in RAM we don't need to 
9673      * block the client at all. */ 
9674     de 
= dictFind(c
->db
->dict
,key
); 
9675     if (de 
== NULL
) return 0; 
9676     o 
= dictGetEntryKey(de
); 
9677     if (o
->storage 
== REDIS_VM_MEMORY
) { 
9679     } else if (o
->storage 
== REDIS_VM_SWAPPING
) { 
9680         /* We were swapping the key, undo it! */ 
9681         vmCancelThreadedIOJob(o
); 
9685     /* OK: the key is either swapped, or being loaded just now. */ 
9687     /* Add the key to the list of keys this client is waiting for. 
9688      * This maps clients to keys they are waiting for. */ 
9689     listAddNodeTail(c
->io_keys
,key
); 
9692     /* Add the client to the swapped keys => clients waiting map. */ 
9693     de 
= dictFind(c
->db
->io_keys
,key
); 
9697         /* For every key we take a list of clients blocked for it */ 
9699         retval 
= dictAdd(c
->db
->io_keys
,key
,l
); 
9701         assert(retval 
== DICT_OK
); 
9703         l 
= dictGetEntryVal(de
); 
9705     listAddNodeTail(l
,c
); 
9707     /* Are we already loading the key from disk? If not create a job */ 
9708     if (o
->storage 
== REDIS_VM_SWAPPED
) { 
9711         o
->storage 
= REDIS_VM_LOADING
; 
9712         j 
= zmalloc(sizeof(*j
)); 
9713         j
->type 
= REDIS_IOJOB_LOAD
; 
9716         j
->key
->vtype 
= o
->vtype
; 
9717         j
->page 
= o
->vm
.page
; 
9720         j
->thread 
= (pthread_t
) -1; 
9728 /* Preload keys for any command with first, last and step values for 
9729  * the command keys prototype, as defined in the command table. */ 
9730 static void waitForMultipleSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
9732     if (cmd
->vm_firstkey 
== 0) return; 
9733     last 
= cmd
->vm_lastkey
; 
9734     if (last 
< 0) last 
= argc
+last
; 
9735     for (j 
= cmd
->vm_firstkey
; j 
<= last
; j 
+= cmd
->vm_keystep
) { 
9736         redisAssert(j 
< argc
); 
9737         waitForSwappedKey(c
,argv
[j
]); 
9741 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands. 
9742  * Note that the number of keys to preload is user-defined, so we need to 
9743  * apply a sanity check against argc. */ 
9744 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
9748     num 
= atoi(argv
[2]->ptr
); 
9749     if (num 
> (argc
-3)) return; 
9750     for (i 
= 0; i 
< num
; i
++) { 
9751         waitForSwappedKey(c
,argv
[3+i
]); 
9755 /* Preload keys needed to execute the entire MULTI/EXEC block. 
9757  * This function is called by blockClientOnSwappedKeys when EXEC is issued, 
9758  * and will block the client when any command requires a swapped out value. */ 
9759 static void execBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
9761     struct redisCommand 
*mcmd
; 
9764     REDIS_NOTUSED(argc
); 
9765     REDIS_NOTUSED(argv
); 
9767     if (!(c
->flags 
& REDIS_MULTI
)) return; 
9768     for (i 
= 0; i 
< c
->mstate
.count
; i
++) { 
9769         mcmd 
= c
->mstate
.commands
[i
].cmd
; 
9770         margc 
= c
->mstate
.commands
[i
].argc
; 
9771         margv 
= c
->mstate
.commands
[i
].argv
; 
9773         if (mcmd
->vm_preload_proc 
!= NULL
) { 
9774             mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
); 
9776             waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
); 
9781 /* Is this client attempting to run a command against swapped keys? 
9782  * If so, block it ASAP, load the keys in background, then resume it. 
9784  * The important idea about this function is that it can fail! If keys will 
9785  * still be swapped when the client is resumed, this key lookups will 
9786  * just block loading keys from disk. In practical terms this should only 
9787  * happen with SORT BY command or if there is a bug in this function. 
9789  * Return 1 if the client is marked as blocked, 0 if the client can 
9790  * continue as the keys it is going to access appear to be in memory. */ 
9791 static int blockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
) { 
9792     if (cmd
->vm_preload_proc 
!= NULL
) { 
9793         cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
); 
9795         waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
); 
9798     /* If the client was blocked for at least one key, mark it as blocked. */ 
9799     if (listLength(c
->io_keys
)) { 
9800         c
->flags 
|= REDIS_IO_WAIT
; 
9801         aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
9802         server
.vm_blocked_clients
++; 
9809 /* Remove the 'key' from the list of blocked keys for a given client. 
9811  * The function returns 1 when there are no longer blocking keys after 
9812  * the current one was removed (and the client can be unblocked). */ 
9813 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
9817     struct dictEntry 
*de
; 
9819     /* Remove the key from the list of keys this client is waiting for. */ 
9820     listRewind(c
->io_keys
,&li
); 
9821     while ((ln 
= listNext(&li
)) != NULL
) { 
9822         if (equalStringObjects(ln
->value
,key
)) { 
9823             listDelNode(c
->io_keys
,ln
); 
9829     /* Remove the client form the key => waiting clients map. */ 
9830     de 
= dictFind(c
->db
->io_keys
,key
); 
9832     l 
= dictGetEntryVal(de
); 
9833     ln 
= listSearchKey(l
,c
); 
9836     if (listLength(l
) == 0) 
9837         dictDelete(c
->db
->io_keys
,key
); 
9839     return listLength(c
->io_keys
) == 0; 
9842 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
) { 
9843     struct dictEntry 
*de
; 
9848     de 
= dictFind(db
->io_keys
,key
); 
9851     l 
= dictGetEntryVal(de
); 
9852     len 
= listLength(l
); 
9853     /* Note: we can't use something like while(listLength(l)) as the list 
9854      * can be freed by the calling function when we remove the last element. */ 
9857         redisClient 
*c 
= ln
->value
; 
9859         if (dontWaitForSwappedKey(c
,key
)) { 
9860             /* Put the client in the list of clients ready to go as we 
9861              * loaded all the keys about it. */ 
9862             listAddNodeTail(server
.io_ready_clients
,c
); 
9867 /* =========================== Remote Configuration ========================= */ 
9869 static void configSetCommand(redisClient 
*c
) { 
9870     robj 
*o 
= getDecodedObject(c
->argv
[3]); 
9873     if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) { 
9874         zfree(server
.dbfilename
); 
9875         server
.dbfilename 
= zstrdup(o
->ptr
); 
9876     } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) { 
9877         zfree(server
.requirepass
); 
9878         server
.requirepass 
= zstrdup(o
->ptr
); 
9879     } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) { 
9880         zfree(server
.masterauth
); 
9881         server
.masterauth 
= zstrdup(o
->ptr
); 
9882     } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) { 
9883         if (getLongLongFromObject(o
,&ll
) == REDIS_ERR 
|| 
9884             ll 
< 0) goto badfmt
; 
9885         server
.maxmemory 
= ll
; 
9886     } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) { 
9887         if (getLongLongFromObject(o
,&ll
) == REDIS_ERR 
|| 
9888             ll 
< 0 || ll 
> LONG_MAX
) goto badfmt
; 
9889         server
.maxidletime 
= ll
; 
9890     } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) { 
9891         if (!strcasecmp(o
->ptr
,"no")) { 
9892             server
.appendfsync 
= APPENDFSYNC_NO
; 
9893         } else if (!strcasecmp(o
->ptr
,"everysec")) { 
9894             server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
9895         } else if (!strcasecmp(o
->ptr
,"always")) { 
9896             server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
9900     } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) { 
9901         int old 
= server
.appendonly
; 
9902         int new = yesnotoi(o
->ptr
); 
9904         if (new == -1) goto badfmt
; 
9909                 if (startAppendOnly() == REDIS_ERR
) { 
9910                     addReplySds(c
,sdscatprintf(sdsempty(), 
9911                         "-ERR Unable to turn on AOF. Check server logs.\r\n")); 
9917     } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) { 
9919         sds 
*v 
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
); 
9921         /* Perform sanity check before setting the new config: 
9922          * - Even number of args 
9923          * - Seconds >= 1, changes >= 0 */ 
9925             sdsfreesplitres(v
,vlen
); 
9928         for (j 
= 0; j 
< vlen
; j
++) { 
9932             val 
= strtoll(v
[j
], &eptr
, 10); 
9933             if (eptr
[0] != '\0' || 
9934                 ((j 
& 1) == 0 && val 
< 1) || 
9935                 ((j 
& 1) == 1 && val 
< 0)) { 
9936                 sdsfreesplitres(v
,vlen
); 
9940         /* Finally set the new config */ 
9941         resetServerSaveParams(); 
9942         for (j 
= 0; j 
< vlen
; j 
+= 2) { 
9946             seconds 
= strtoll(v
[j
],NULL
,10); 
9947             changes 
= strtoll(v
[j
+1],NULL
,10); 
9948             appendServerSaveParams(seconds
, changes
); 
9950         sdsfreesplitres(v
,vlen
); 
9952         addReplySds(c
,sdscatprintf(sdsempty(), 
9953             "-ERR not supported CONFIG parameter %s\r\n", 
9954             (char*)c
->argv
[2]->ptr
)); 
9959     addReply(c
,shared
.ok
); 
9962 badfmt
: /* Bad format errors */ 
9963     addReplySds(c
,sdscatprintf(sdsempty(), 
9964         "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n", 
9966             (char*)c
->argv
[2]->ptr
)); 
9970 static void configGetCommand(redisClient 
*c
) { 
9971     robj 
*o 
= getDecodedObject(c
->argv
[2]); 
9972     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
9973     char *pattern 
= o
->ptr
; 
9977     decrRefCount(lenobj
); 
9979     if (stringmatch(pattern
,"dbfilename",0)) { 
9980         addReplyBulkCString(c
,"dbfilename"); 
9981         addReplyBulkCString(c
,server
.dbfilename
); 
9984     if (stringmatch(pattern
,"requirepass",0)) { 
9985         addReplyBulkCString(c
,"requirepass"); 
9986         addReplyBulkCString(c
,server
.requirepass
); 
9989     if (stringmatch(pattern
,"masterauth",0)) { 
9990         addReplyBulkCString(c
,"masterauth"); 
9991         addReplyBulkCString(c
,server
.masterauth
); 
9994     if (stringmatch(pattern
,"maxmemory",0)) { 
9997         ll2string(buf
,128,server
.maxmemory
); 
9998         addReplyBulkCString(c
,"maxmemory"); 
9999         addReplyBulkCString(c
,buf
); 
10002     if (stringmatch(pattern
,"timeout",0)) { 
10005         ll2string(buf
,128,server
.maxidletime
); 
10006         addReplyBulkCString(c
,"timeout"); 
10007         addReplyBulkCString(c
,buf
); 
10010     if (stringmatch(pattern
,"appendonly",0)) { 
10011         addReplyBulkCString(c
,"appendonly"); 
10012         addReplyBulkCString(c
,server
.appendonly 
? "yes" : "no"); 
10015     if (stringmatch(pattern
,"appendfsync",0)) { 
10018         switch(server
.appendfsync
) { 
10019         case APPENDFSYNC_NO
: policy 
= "no"; break; 
10020         case APPENDFSYNC_EVERYSEC
: policy 
= "everysec"; break; 
10021         case APPENDFSYNC_ALWAYS
: policy 
= "always"; break; 
10022         default: policy 
= "unknown"; break; /* too harmless to panic */ 
10024         addReplyBulkCString(c
,"appendfsync"); 
10025         addReplyBulkCString(c
,policy
); 
10028     if (stringmatch(pattern
,"save",0)) { 
10029         sds buf 
= sdsempty(); 
10032         for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
10033             buf 
= sdscatprintf(buf
,"%ld %d", 
10034                     server
.saveparams
[j
].seconds
, 
10035                     server
.saveparams
[j
].changes
); 
10036             if (j 
!= server
.saveparamslen
-1) 
10037                 buf 
= sdscatlen(buf
," ",1); 
10039         addReplyBulkCString(c
,"save"); 
10040         addReplyBulkCString(c
,buf
); 
10045     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2); 
10048 static void configCommand(redisClient 
*c
) { 
10049     if (!strcasecmp(c
->argv
[1]->ptr
,"set")) { 
10050         if (c
->argc 
!= 4) goto badarity
; 
10051         configSetCommand(c
); 
10052     } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) { 
10053         if (c
->argc 
!= 3) goto badarity
; 
10054         configGetCommand(c
); 
10055     } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) { 
10056         if (c
->argc 
!= 2) goto badarity
; 
10057         server
.stat_numcommands 
= 0; 
10058         server
.stat_numconnections 
= 0; 
10059         server
.stat_expiredkeys 
= 0; 
10060         server
.stat_starttime 
= time(NULL
); 
10061         addReply(c
,shared
.ok
); 
10063         addReplySds(c
,sdscatprintf(sdsempty(), 
10064             "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n")); 
10069     addReplySds(c
,sdscatprintf(sdsempty(), 
10070         "-ERR Wrong number of arguments for CONFIG %s\r\n", 
10071         (char*) c
->argv
[1]->ptr
)); 
10074 /* =========================== Pubsub implementation ======================== */ 
10076 static void freePubsubPattern(void *p
) { 
10077     pubsubPattern 
*pat 
= p
; 
10079     decrRefCount(pat
->pattern
); 
10083 static int listMatchPubsubPattern(void *a
, void *b
) { 
10084     pubsubPattern 
*pa 
= a
, *pb 
= b
; 
10086     return (pa
->client 
== pb
->client
) && 
10087            (equalStringObjects(pa
->pattern
,pb
->pattern
)); 
10090 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or 
10091  * 0 if the client was already subscribed to that channel. */ 
10092 static int pubsubSubscribeChannel(redisClient 
*c
, robj 
*channel
) { 
10093     struct dictEntry 
*de
; 
10094     list 
*clients 
= NULL
; 
10097     /* Add the channel to the client -> channels hash table */ 
10098     if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) { 
10100         incrRefCount(channel
); 
10101         /* Add the client to the channel -> list of clients hash table */ 
10102         de 
= dictFind(server
.pubsub_channels
,channel
); 
10104             clients 
= listCreate(); 
10105             dictAdd(server
.pubsub_channels
,channel
,clients
); 
10106             incrRefCount(channel
); 
10108             clients 
= dictGetEntryVal(de
); 
10110         listAddNodeTail(clients
,c
); 
10112     /* Notify the client */ 
10113     addReply(c
,shared
.mbulk3
); 
10114     addReply(c
,shared
.subscribebulk
); 
10115     addReplyBulk(c
,channel
); 
10116     addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
10120 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
10121  * 0 if the client was not subscribed to the specified channel. */ 
10122 static int pubsubUnsubscribeChannel(redisClient 
*c
, robj 
*channel
, int notify
) { 
10123     struct dictEntry 
*de
; 
10128     /* Remove the channel from the client -> channels hash table */ 
10129     incrRefCount(channel
); /* channel may be just a pointer to the same object 
10130                             we have in the hash tables. Protect it... */ 
10131     if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) { 
10133         /* Remove the client from the channel -> clients list hash table */ 
10134         de 
= dictFind(server
.pubsub_channels
,channel
); 
10135         assert(de 
!= NULL
); 
10136         clients 
= dictGetEntryVal(de
); 
10137         ln 
= listSearchKey(clients
,c
); 
10138         assert(ln 
!= NULL
); 
10139         listDelNode(clients
,ln
); 
10140         if (listLength(clients
) == 0) { 
10141             /* Free the list and associated hash entry at all if this was 
10142              * the latest client, so that it will be possible to abuse 
10143              * Redis PUBSUB creating millions of channels. */ 
10144             dictDelete(server
.pubsub_channels
,channel
); 
10147     /* Notify the client */ 
10149         addReply(c
,shared
.mbulk3
); 
10150         addReply(c
,shared
.unsubscribebulk
); 
10151         addReplyBulk(c
,channel
); 
10152         addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+ 
10153                        listLength(c
->pubsub_patterns
)); 
10156     decrRefCount(channel
); /* it is finally safe to release it */ 
10160 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */ 
10161 static int pubsubSubscribePattern(redisClient 
*c
, robj 
*pattern
) { 
10164     if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) { 
10166         pubsubPattern 
*pat
; 
10167         listAddNodeTail(c
->pubsub_patterns
,pattern
); 
10168         incrRefCount(pattern
); 
10169         pat 
= zmalloc(sizeof(*pat
)); 
10170         pat
->pattern 
= getDecodedObject(pattern
); 
10172         listAddNodeTail(server
.pubsub_patterns
,pat
); 
10174     /* Notify the client */ 
10175     addReply(c
,shared
.mbulk3
); 
10176     addReply(c
,shared
.psubscribebulk
); 
10177     addReplyBulk(c
,pattern
); 
10178     addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
10182 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
10183  * 0 if the client was not subscribed to the specified channel. */ 
10184 static int pubsubUnsubscribePattern(redisClient 
*c
, robj 
*pattern
, int notify
) { 
10189     incrRefCount(pattern
); /* Protect the object. May be the same we remove */ 
10190     if ((ln 
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) { 
10192         listDelNode(c
->pubsub_patterns
,ln
); 
10194         pat
.pattern 
= pattern
; 
10195         ln 
= listSearchKey(server
.pubsub_patterns
,&pat
); 
10196         listDelNode(server
.pubsub_patterns
,ln
); 
10198     /* Notify the client */ 
10200         addReply(c
,shared
.mbulk3
); 
10201         addReply(c
,shared
.punsubscribebulk
); 
10202         addReplyBulk(c
,pattern
); 
10203         addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+ 
10204                        listLength(c
->pubsub_patterns
)); 
10206     decrRefCount(pattern
); 
10210 /* Unsubscribe from all the channels. Return the number of channels the 
10211  * client was subscribed from. */ 
10212 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
) { 
10213     dictIterator 
*di 
= dictGetIterator(c
->pubsub_channels
); 
10217     while((de 
= dictNext(di
)) != NULL
) { 
10218         robj 
*channel 
= dictGetEntryKey(de
); 
10220         count 
+= pubsubUnsubscribeChannel(c
,channel
,notify
); 
10222     dictReleaseIterator(di
); 
10226 /* Unsubscribe from all the patterns. Return the number of patterns the 
10227  * client was subscribed from. */ 
10228 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
) { 
10233     listRewind(c
->pubsub_patterns
,&li
); 
10234     while ((ln 
= listNext(&li
)) != NULL
) { 
10235         robj 
*pattern 
= ln
->value
; 
10237         count 
+= pubsubUnsubscribePattern(c
,pattern
,notify
); 
10242 /* Publish a message */ 
10243 static int pubsubPublishMessage(robj 
*channel
, robj 
*message
) { 
10245     struct dictEntry 
*de
; 
10249     /* Send to clients listening for that channel */ 
10250     de 
= dictFind(server
.pubsub_channels
,channel
); 
10252         list 
*list 
= dictGetEntryVal(de
); 
10256         listRewind(list
,&li
); 
10257         while ((ln 
= listNext(&li
)) != NULL
) { 
10258             redisClient 
*c 
= ln
->value
; 
10260             addReply(c
,shared
.mbulk3
); 
10261             addReply(c
,shared
.messagebulk
); 
10262             addReplyBulk(c
,channel
); 
10263             addReplyBulk(c
,message
); 
10267     /* Send to clients listening to matching channels */ 
10268     if (listLength(server
.pubsub_patterns
)) { 
10269         listRewind(server
.pubsub_patterns
,&li
); 
10270         channel 
= getDecodedObject(channel
); 
10271         while ((ln 
= listNext(&li
)) != NULL
) { 
10272             pubsubPattern 
*pat 
= ln
->value
; 
10274             if (stringmatchlen((char*)pat
->pattern
->ptr
, 
10275                                 sdslen(pat
->pattern
->ptr
), 
10276                                 (char*)channel
->ptr
, 
10277                                 sdslen(channel
->ptr
),0)) { 
10278                 addReply(pat
->client
,shared
.mbulk4
); 
10279                 addReply(pat
->client
,shared
.pmessagebulk
); 
10280                 addReplyBulk(pat
->client
,pat
->pattern
); 
10281                 addReplyBulk(pat
->client
,channel
); 
10282                 addReplyBulk(pat
->client
,message
); 
10286         decrRefCount(channel
); 
10291 static void subscribeCommand(redisClient 
*c
) { 
10294     for (j 
= 1; j 
< c
->argc
; j
++) 
10295         pubsubSubscribeChannel(c
,c
->argv
[j
]); 
10298 static void unsubscribeCommand(redisClient 
*c
) { 
10299     if (c
->argc 
== 1) { 
10300         pubsubUnsubscribeAllChannels(c
,1); 
10305         for (j 
= 1; j 
< c
->argc
; j
++) 
10306             pubsubUnsubscribeChannel(c
,c
->argv
[j
],1); 
10310 static void psubscribeCommand(redisClient 
*c
) { 
10313     for (j 
= 1; j 
< c
->argc
; j
++) 
10314         pubsubSubscribePattern(c
,c
->argv
[j
]); 
10317 static void punsubscribeCommand(redisClient 
*c
) { 
10318     if (c
->argc 
== 1) { 
10319         pubsubUnsubscribeAllPatterns(c
,1); 
10324         for (j 
= 1; j 
< c
->argc
; j
++) 
10325             pubsubUnsubscribePattern(c
,c
->argv
[j
],1); 
10329 static void publishCommand(redisClient 
*c
) { 
10330     int receivers 
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]); 
10331     addReplyLongLong(c
,receivers
); 
10334 /* ================================= Debugging ============================== */ 
10336 /* Compute the sha1 of string at 's' with 'len' bytes long. 
10337  * The SHA1 is then xored againt the string pointed by digest. 
10338  * Since xor is commutative, this operation is used in order to 
10339  * "add" digests relative to unordered elements. 
10341  * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */ 
10342 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) { 
10344     unsigned char hash
[20], *s 
= ptr
; 
10348     SHA1Update(&ctx
,s
,len
); 
10349     SHA1Final(hash
,&ctx
); 
10351     for (j 
= 0; j 
< 20; j
++) 
10352         digest
[j
] ^= hash
[j
]; 
10355 static void xorObjectDigest(unsigned char *digest
, robj 
*o
) { 
10356     o 
= getDecodedObject(o
); 
10357     xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
)); 
10361 /* This function instead of just computing the SHA1 and xoring it 
10362  * against diget, also perform the digest of "digest" itself and 
10363  * replace the old value with the new one. 
10365  * So the final digest will be: 
10367  * digest = SHA1(digest xor SHA1(data)) 
10369  * This function is used every time we want to preserve the order so 
10370  * that digest(a,b,c,d) will be different than digest(b,c,d,a) 
10372  * Also note that mixdigest("foo") followed by mixdigest("bar") 
10373  * will lead to a different digest compared to "fo", "obar". 
10375 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) { 
10379     xorDigest(digest
,s
,len
); 
10381     SHA1Update(&ctx
,digest
,20); 
10382     SHA1Final(digest
,&ctx
); 
10385 static void mixObjectDigest(unsigned char *digest
, robj 
*o
) { 
10386     o 
= getDecodedObject(o
); 
10387     mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
)); 
10391 /* Compute the dataset digest. Since keys, sets elements, hashes elements 
10392  * are not ordered, we use a trick: every aggregate digest is the xor 
10393  * of the digests of their elements. This way the order will not change 
10394  * the result. For list instead we use a feedback entering the output digest 
10395  * as input in order to ensure that a different ordered list will result in 
10396  * a different digest. */ 
10397 static void computeDatasetDigest(unsigned char *final
) { 
10398     unsigned char digest
[20]; 
10400     dictIterator 
*di 
= NULL
; 
10405     memset(final
,0,20); /* Start with a clean result */ 
10407     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
10408         redisDb 
*db 
= server
.db
+j
; 
10410         if (dictSize(db
->dict
) == 0) continue; 
10411         di 
= dictGetIterator(db
->dict
); 
10413         /* hash the DB id, so the same dataset moved in a different 
10414          * DB will lead to a different digest */ 
10416         mixDigest(final
,&aux
,sizeof(aux
)); 
10418         /* Iterate this DB writing every entry */ 
10419         while((de 
= dictNext(di
)) != NULL
) { 
10423             memset(digest
,0,20); /* This key-val digest */ 
10424             key 
= dictGetEntryKey(de
); 
10425             mixObjectDigest(digest
,key
); 
10426             if (!server
.vm_enabled 
|| key
->storage 
== REDIS_VM_MEMORY 
|| 
10427                 key
->storage 
== REDIS_VM_SWAPPING
) { 
10428                 o 
= dictGetEntryVal(de
); 
10431                 o 
= vmPreviewObject(key
); 
10433             aux 
= htonl(o
->type
); 
10434             mixDigest(digest
,&aux
,sizeof(aux
)); 
10435             expiretime 
= getExpire(db
,key
); 
10437             /* Save the key and associated value */ 
10438             if (o
->type 
== REDIS_STRING
) { 
10439                 mixObjectDigest(digest
,o
); 
10440             } else if (o
->type 
== REDIS_LIST
) { 
10441                 list 
*list 
= o
->ptr
; 
10445                 listRewind(list
,&li
); 
10446                 while((ln 
= listNext(&li
))) { 
10447                     robj 
*eleobj 
= listNodeValue(ln
); 
10449                     mixObjectDigest(digest
,eleobj
); 
10451             } else if (o
->type 
== REDIS_SET
) { 
10452                 dict 
*set 
= o
->ptr
; 
10453                 dictIterator 
*di 
= dictGetIterator(set
); 
10456                 while((de 
= dictNext(di
)) != NULL
) { 
10457                     robj 
*eleobj 
= dictGetEntryKey(de
); 
10459                     xorObjectDigest(digest
,eleobj
); 
10461                 dictReleaseIterator(di
); 
10462             } else if (o
->type 
== REDIS_ZSET
) { 
10464                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
10467                 while((de 
= dictNext(di
)) != NULL
) { 
10468                     robj 
*eleobj 
= dictGetEntryKey(de
); 
10469                     double *score 
= dictGetEntryVal(de
); 
10470                     unsigned char eledigest
[20]; 
10472                     snprintf(buf
,sizeof(buf
),"%.17g",*score
); 
10473                     memset(eledigest
,0,20); 
10474                     mixObjectDigest(eledigest
,eleobj
); 
10475                     mixDigest(eledigest
,buf
,strlen(buf
)); 
10476                     xorDigest(digest
,eledigest
,20); 
10478                 dictReleaseIterator(di
); 
10479             } else if (o
->type 
== REDIS_HASH
) { 
10483                 hi 
= hashInitIterator(o
); 
10484                 while (hashNext(hi
) != REDIS_ERR
) { 
10485                     unsigned char eledigest
[20]; 
10487                     memset(eledigest
,0,20); 
10488                     obj 
= hashCurrent(hi
,REDIS_HASH_KEY
); 
10489                     mixObjectDigest(eledigest
,obj
); 
10491                     obj 
= hashCurrent(hi
,REDIS_HASH_VALUE
); 
10492                     mixObjectDigest(eledigest
,obj
); 
10494                     xorDigest(digest
,eledigest
,20); 
10496                 hashReleaseIterator(hi
); 
10498                 redisPanic("Unknown object type"); 
10501             /* If the key has an expire, add it to the mix */ 
10502             if (expiretime 
!= -1) xorDigest(digest
,"!!expire!!",10); 
10503             /* We can finally xor the key-val digest to the final digest */ 
10504             xorDigest(final
,digest
,20); 
10506         dictReleaseIterator(di
); 
10510 static void debugCommand(redisClient 
*c
) { 
10511     if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) { 
10512         *((char*)-1) = 'x'; 
10513     } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) { 
10514         if (rdbSave(server
.dbfilename
) != REDIS_OK
) { 
10515             addReply(c
,shared
.err
); 
10519         if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
10520             addReply(c
,shared
.err
); 
10523         redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD"); 
10524         addReply(c
,shared
.ok
); 
10525     } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) { 
10527         if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) { 
10528             addReply(c
,shared
.err
); 
10531         redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF"); 
10532         addReply(c
,shared
.ok
); 
10533     } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc 
== 3) { 
10534         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
10538             addReply(c
,shared
.nokeyerr
); 
10541         key 
= dictGetEntryKey(de
); 
10542         val 
= dictGetEntryVal(de
); 
10543         if (!server
.vm_enabled 
|| (key
->storage 
== REDIS_VM_MEMORY 
|| 
10544                                    key
->storage 
== REDIS_VM_SWAPPING
)) { 
10548             if (val
->encoding 
< (sizeof(strencoding
)/sizeof(char*))) { 
10549                 strenc 
= strencoding
[val
->encoding
]; 
10551                 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
); 
10554             addReplySds(c
,sdscatprintf(sdsempty(), 
10555                 "+Key at:%p refcount:%d, value at:%p refcount:%d " 
10556                 "encoding:%s serializedlength:%lld\r\n", 
10557                 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
, 
10558                 strenc
, (long long) rdbSavedObjectLen(val
,NULL
))); 
10560             addReplySds(c
,sdscatprintf(sdsempty(), 
10561                 "+Key at:%p refcount:%d, value swapped at: page %llu " 
10562                 "using %llu pages\r\n", 
10563                 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
, 
10564                 (unsigned long long) key
->vm
.usedpages
)); 
10566     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc 
== 3) { 
10567         lookupKeyRead(c
->db
,c
->argv
[2]); 
10568         addReply(c
,shared
.ok
); 
10569     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc 
== 3) { 
10570         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]); 
10573         if (!server
.vm_enabled
) { 
10574             addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n")); 
10578             addReply(c
,shared
.nokeyerr
); 
10581         key 
= dictGetEntryKey(de
); 
10582         val 
= dictGetEntryVal(de
); 
10583         /* If the key is shared we want to create a copy */ 
10584         if (key
->refcount 
> 1) { 
10585             robj 
*newkey 
= dupStringObject(key
); 
10587             key 
= dictGetEntryKey(de
) = newkey
; 
10590         if (key
->storage 
!= REDIS_VM_MEMORY
) { 
10591             addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n")); 
10592         } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) { 
10593             dictGetEntryVal(de
) = NULL
; 
10594             addReply(c
,shared
.ok
); 
10596             addReply(c
,shared
.err
); 
10598     } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc 
== 3) { 
10603         if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
) 
10605         for (j 
= 0; j 
< keys
; j
++) { 
10606             snprintf(buf
,sizeof(buf
),"key:%lu",j
); 
10607             key 
= createStringObject(buf
,strlen(buf
)); 
10608             if (lookupKeyRead(c
->db
,key
) != NULL
) { 
10612             snprintf(buf
,sizeof(buf
),"value:%lu",j
); 
10613             val 
= createStringObject(buf
,strlen(buf
)); 
10614             dictAdd(c
->db
->dict
,key
,val
); 
10616         addReply(c
,shared
.ok
); 
10617     } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc 
== 2) { 
10618         unsigned char digest
[20]; 
10619         sds d 
= sdsnew("+"); 
10622         computeDatasetDigest(digest
); 
10623         for (j 
= 0; j 
< 20; j
++) 
10624             d 
= sdscatprintf(d
, "%02x",digest
[j
]); 
10626         d 
= sdscatlen(d
,"\r\n",2); 
10629         addReplySds(c
,sdsnew( 
10630             "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n")); 
10634 static void _redisAssert(char *estr
, char *file
, int line
) { 
10635     redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ==="); 
10636     redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
); 
10637 #ifdef HAVE_BACKTRACE 
10638     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
10639     *((char*)-1) = 'x'; 
10643 static void _redisPanic(char *msg
, char *file
, int line
) { 
10644     redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue"); 
10645     redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
); 
10646 #ifdef HAVE_BACKTRACE 
10647     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
10648     *((char*)-1) = 'x'; 
10652 /* =================================== Main! ================================ */ 
10655 int linuxOvercommitMemoryValue(void) { 
10656     FILE *fp 
= fopen("/proc/sys/vm/overcommit_memory","r"); 
10659     if (!fp
) return -1; 
10660     if (fgets(buf
,64,fp
) == NULL
) { 
10669 void linuxOvercommitMemoryWarning(void) { 
10670     if (linuxOvercommitMemoryValue() == 0) { 
10671         redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); 
10674 #endif /* __linux__ */ 
10676 static void daemonize(void) { 
10680     if (fork() != 0) exit(0); /* parent exits */ 
10681     setsid(); /* create a new session */ 
10683     /* Every output goes to /dev/null. If Redis is daemonized but 
10684      * the 'logfile' is set to 'stdout' in the configuration file 
10685      * it will not log at all. */ 
10686     if ((fd 
= open("/dev/null", O_RDWR
, 0)) != -1) { 
10687         dup2(fd
, STDIN_FILENO
); 
10688         dup2(fd
, STDOUT_FILENO
); 
10689         dup2(fd
, STDERR_FILENO
); 
10690         if (fd 
> STDERR_FILENO
) close(fd
); 
10692     /* Try to write the pid file */ 
10693     fp 
= fopen(server
.pidfile
,"w"); 
10695         fprintf(fp
,"%d\n",getpid()); 
10700 static void version() { 
10701     printf("Redis server version %s\n", REDIS_VERSION
); 
10705 static void usage() { 
10706     fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n"); 
10707     fprintf(stderr
,"       ./redis-server - (read config from stdin)\n"); 
10711 int main(int argc
, char **argv
) { 
10714     initServerConfig(); 
10716         if (strcmp(argv
[1], "-v") == 0 || 
10717             strcmp(argv
[1], "--version") == 0) version(); 
10718         if (strcmp(argv
[1], "--help") == 0) usage(); 
10719         resetServerSaveParams(); 
10720         loadServerConfig(argv
[1]); 
10721     } else if ((argc 
> 2)) { 
10724         redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); 
10726     if (server
.daemonize
) daemonize(); 
10728     redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
); 
10730     linuxOvercommitMemoryWarning(); 
10732     start 
= time(NULL
); 
10733     if (server
.appendonly
) { 
10734         if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
) 
10735             redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
); 
10737         if (rdbLoad(server
.dbfilename
) == REDIS_OK
) 
10738             redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
); 
10740     redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
); 
10741     aeSetBeforeSleepProc(server
.el
,beforeSleep
); 
10743     aeDeleteEventLoop(server
.el
); 
10747 /* ============================= Backtrace support ========================= */ 
10749 #ifdef HAVE_BACKTRACE 
10750 static char *findFuncName(void *pointer
, unsigned long *offset
); 
10752 static void *getMcontextEip(ucontext_t 
*uc
) { 
10753 #if defined(__FreeBSD__) 
10754     return (void*) uc
->uc_mcontext
.mc_eip
; 
10755 #elif defined(__dietlibc__) 
10756     return (void*) uc
->uc_mcontext
.eip
; 
10757 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) 
10759     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
10761     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
10763 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6) 
10764   #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__) 
10765     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
10767     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
10769 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__) 
10770     return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */ 
10771 #elif defined(__ia64__) /* Linux IA64 */ 
10772     return (void*) uc
->uc_mcontext
.sc_ip
; 
10778 static void segvHandler(int sig
, siginfo_t 
*info
, void *secret
) { 
10780     char **messages 
= NULL
; 
10781     int i
, trace_size 
= 0; 
10782     unsigned long offset
=0; 
10783     ucontext_t 
*uc 
= (ucontext_t
*) secret
; 
10785     REDIS_NOTUSED(info
); 
10787     redisLog(REDIS_WARNING
, 
10788         "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
); 
10789     infostring 
= genRedisInfoString(); 
10790     redisLog(REDIS_WARNING
, "%s",infostring
); 
10791     /* It's not safe to sdsfree() the returned string under memory 
10792      * corruption conditions. Let it leak as we are going to abort */ 
10794     trace_size 
= backtrace(trace
, 100); 
10795     /* overwrite sigaction with caller's address */ 
10796     if (getMcontextEip(uc
) != NULL
) { 
10797         trace
[1] = getMcontextEip(uc
); 
10799     messages 
= backtrace_symbols(trace
, trace_size
); 
10801     for (i
=1; i
<trace_size
; ++i
) { 
10802         char *fn 
= findFuncName(trace
[i
], &offset
), *p
; 
10804         p 
= strchr(messages
[i
],'+'); 
10805         if (!fn 
|| (p 
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) { 
10806             redisLog(REDIS_WARNING
,"%s", messages
[i
]); 
10808             redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
); 
10811     /* free(messages); Don't call free() with possibly corrupted memory. */ 
10815 static void setupSigSegvAction(void) { 
10816     struct sigaction act
; 
10818     sigemptyset (&act
.sa_mask
); 
10819     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction 
10820      * is used. Otherwise, sa_handler is used */ 
10821     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND 
| SA_SIGINFO
; 
10822     act
.sa_sigaction 
= segvHandler
; 
10823     sigaction (SIGSEGV
, &act
, NULL
); 
10824     sigaction (SIGBUS
, &act
, NULL
); 
10825     sigaction (SIGFPE
, &act
, NULL
); 
10826     sigaction (SIGILL
, &act
, NULL
); 
10827     sigaction (SIGBUS
, &act
, NULL
); 
10831 #include "staticsymbols.h" 
10832 /* This function try to convert a pointer into a function name. It's used in 
10833  * oreder to provide a backtrace under segmentation fault that's able to 
10834  * display functions declared as static (otherwise the backtrace is useless). */ 
10835 static char *findFuncName(void *pointer
, unsigned long *offset
){ 
10837     unsigned long off
, minoff 
= 0; 
10839     /* Try to match against the Symbol with the smallest offset */ 
10840     for (i
=0; symsTable
[i
].pointer
; i
++) { 
10841         unsigned long lp 
= (unsigned long) pointer
; 
10843         if (lp 
!= (unsigned long)-1 && lp 
>= symsTable
[i
].pointer
) { 
10844             off
=lp
-symsTable
[i
].pointer
; 
10845             if (ret 
< 0 || off 
< minoff
) { 
10851     if (ret 
== -1) return NULL
; 
10853     return symsTable
[ret
].name
; 
10855 #else /* HAVE_BACKTRACE */ 
10856 static void setupSigSegvAction(void) { 
10858 #endif /* HAVE_BACKTRACE */