2  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
   5  * Redistribution and use in source and binary forms, with or without 
   6  * modification, are permitted provided that the following conditions are met: 
   8  *   * Redistributions of source code must retain the above copyright notice, 
   9  *     this list of conditions and the following disclaimer. 
  10  *   * Redistributions in binary form must reproduce the above copyright 
  11  *     notice, this list of conditions and the following disclaimer in the 
  12  *     documentation and/or other materials provided with the distribution. 
  13  *   * Neither the name of Redis nor the names of its contributors may be used 
  14  *     to endorse or promote products derived from this software without 
  15  *     specific prior written permission. 
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  27  * POSSIBILITY OF SUCH DAMAGE. 
  30 #define REDIS_VERSION "2.1.1" 
  45 #endif /* HAVE_BACKTRACE */ 
  53 #include <arpa/inet.h> 
  57 #include <sys/resource.h> 
  65 #include "solarisfixes.h" 
  69 #include "ae.h"     /* Event driven programming library */ 
  70 #include "sds.h"    /* Dynamic safe strings */ 
  71 #include "anet.h"   /* Networking the easy way */ 
  72 #include "dict.h"   /* Hash tables */ 
  73 #include "adlist.h" /* Linked lists */ 
  74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */ 
  75 #include "lzf.h"    /* LZF compression library */ 
  76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ 
  77 #include "zipmap.h" /* Compact dictionary-alike data structure */ 
  78 #include "ziplist.h" /* Compact list data structure */ 
  79 #include "sha1.h"   /* SHA1 is used for DEBUG DIGEST */ 
  80 #include "release.h" /* Release and/or git repository information */ 
  86 /* Static server configuration */ 
  87 #define REDIS_SERVERPORT        6379    /* TCP port */ 
  88 #define REDIS_MAXIDLETIME       (60*5)  /* default client timeout */ 
  89 #define REDIS_IOBUF_LEN         1024 
  90 #define REDIS_LOADBUF_LEN       1024 
  91 #define REDIS_STATIC_ARGS       8 
  92 #define REDIS_DEFAULT_DBNUM     16 
  93 #define REDIS_CONFIGLINE_MAX    1024 
  94 #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */ 
  95 #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */ 
  96 #define REDIS_EXPIRELOOKUPS_PER_CRON    10 /* lookup 10 expires per loop */ 
  97 #define REDIS_MAX_WRITE_PER_EVENT (1024*64) 
  98 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ 
 100 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ 
 101 #define REDIS_WRITEV_THRESHOLD      3 
 102 /* Max number of iovecs used for each writev call */ 
 103 #define REDIS_WRITEV_IOVEC_COUNT    256 
 105 /* Hash table parameters */ 
 106 #define REDIS_HT_MINFILL        10      /* Minimal hash table fill 10% */ 
 109 #define REDIS_CMD_BULK          1       /* Bulk write command */ 
 110 #define REDIS_CMD_INLINE        2       /* Inline command */ 
 111 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with 
 112    this flags will return an error when the 'maxmemory' option is set in the 
 113    config file and the server is using more than maxmemory bytes of memory. 
 114    In short this commands are denied on low memory conditions. */ 
 115 #define REDIS_CMD_DENYOOM       4 
 116 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */ 
 119 #define REDIS_STRING 0 
 124 #define REDIS_VMPOINTER 8 
 126 /* Objects encoding. Some kind of objects like Strings and Hashes can be 
 127  * internally represented in multiple ways. The 'encoding' field of the object 
 128  * is set to one of this fields for this object. */ 
 129 #define REDIS_ENCODING_RAW 0     /* Raw representation */ 
 130 #define REDIS_ENCODING_INT 1     /* Encoded as integer */ 
 131 #define REDIS_ENCODING_HT 2      /* Encoded as hash table */ 
 132 #define REDIS_ENCODING_ZIPMAP 3  /* Encoded as zipmap */ 
 133 #define REDIS_ENCODING_LIST 4    /* Encoded as zipmap */ 
 134 #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */ 
 136 static char* strencoding
[] = { 
 137     "raw", "int", "hashtable", "zipmap", "list", "ziplist" 
 140 /* Object types only used for dumping to disk */ 
 141 #define REDIS_EXPIRETIME 253 
 142 #define REDIS_SELECTDB 254 
 143 #define REDIS_EOF 255 
 145 /* Defines related to the dump file format. To store 32 bits lengths for short 
 146  * keys requires a lot of space, so we check the most significant 2 bits of 
 147  * the first byte to interpreter the length: 
 149  * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte 
 150  * 01|000000 00000000 =>  01, the len is 14 byes, 6 bits + 8 bits of next byte 
 151  * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow 
 152  * 11|000000 this means: specially encoded object will follow. The six bits 
 153  *           number specify the kind of object that follows. 
 154  *           See the REDIS_RDB_ENC_* defines. 
 156  * Lenghts up to 63 are stored using a single byte, most DB keys, and may 
 157  * values, will fit inside. */ 
 158 #define REDIS_RDB_6BITLEN 0 
 159 #define REDIS_RDB_14BITLEN 1 
 160 #define REDIS_RDB_32BITLEN 2 
 161 #define REDIS_RDB_ENCVAL 3 
 162 #define REDIS_RDB_LENERR UINT_MAX 
 164 /* When a length of a string object stored on disk has the first two bits 
 165  * set, the remaining two bits specify a special encoding for the object 
 166  * accordingly to the following defines: */ 
 167 #define REDIS_RDB_ENC_INT8 0        /* 8 bit signed integer */ 
 168 #define REDIS_RDB_ENC_INT16 1       /* 16 bit signed integer */ 
 169 #define REDIS_RDB_ENC_INT32 2       /* 32 bit signed integer */ 
 170 #define REDIS_RDB_ENC_LZF 3         /* string compressed with FASTLZ */ 
 172 /* Virtual memory object->where field. */ 
 173 #define REDIS_VM_MEMORY 0       /* The object is on memory */ 
 174 #define REDIS_VM_SWAPPED 1      /* The object is on disk */ 
 175 #define REDIS_VM_SWAPPING 2     /* Redis is swapping this object on disk */ 
 176 #define REDIS_VM_LOADING 3      /* Redis is loading this object from disk */ 
 178 /* Virtual memory static configuration stuff. 
 179  * Check vmFindContiguousPages() to know more about this magic numbers. */ 
 180 #define REDIS_VM_MAX_NEAR_PAGES 65536 
 181 #define REDIS_VM_MAX_RANDOM_JUMP 4096 
 182 #define REDIS_VM_MAX_THREADS 32 
 183 #define REDIS_THREAD_STACK_SIZE (1024*1024*4) 
 184 /* The following is the *percentage* of completed I/O jobs to process when the 
 185  * handelr is called. While Virtual Memory I/O operations are performed by 
 186  * threads, this operations must be processed by the main thread when completed 
 187  * in order to take effect. */ 
 188 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 
 191 #define REDIS_SLAVE 1       /* This client is a slave server */ 
 192 #define REDIS_MASTER 2      /* This client is a master server */ 
 193 #define REDIS_MONITOR 4     /* This client is a slave monitor, see MONITOR */ 
 194 #define REDIS_MULTI 8       /* This client is in a MULTI context */ 
 195 #define REDIS_BLOCKED 16    /* The client is waiting in a blocking operation */ 
 196 #define REDIS_IO_WAIT 32    /* The client is waiting for Virtual Memory I/O */ 
 197 #define REDIS_DIRTY_CAS 64  /* Watched keys modified. EXEC will fail. */ 
 199 /* Slave replication state - slave side */ 
 200 #define REDIS_REPL_NONE 0   /* No active replication */ 
 201 #define REDIS_REPL_CONNECT 1    /* Must connect to master */ 
 202 #define REDIS_REPL_CONNECTED 2  /* Connected to master */ 
 204 /* Slave replication state - from the point of view of master 
 205  * Note that in SEND_BULK and ONLINE state the slave receives new updates 
 206  * in its output queue. In the WAIT_BGSAVE state instead the server is waiting 
 207  * to start the next background saving in order to send updates to it. */ 
 208 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ 
 209 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ 
 210 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ 
 211 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ 
 213 /* List related stuff */ 
 217 /* Sort operations */ 
 218 #define REDIS_SORT_GET 0 
 219 #define REDIS_SORT_ASC 1 
 220 #define REDIS_SORT_DESC 2 
 221 #define REDIS_SORTKEY_MAX 1024 
 224 #define REDIS_DEBUG 0 
 225 #define REDIS_VERBOSE 1 
 226 #define REDIS_NOTICE 2 
 227 #define REDIS_WARNING 3 
 229 /* Anti-warning macro... */ 
 230 #define REDIS_NOTUSED(V) ((void) V) 
 232 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ 
 233 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */ 
 235 /* Append only defines */ 
 236 #define APPENDFSYNC_NO 0 
 237 #define APPENDFSYNC_ALWAYS 1 
 238 #define APPENDFSYNC_EVERYSEC 2 
 240 /* Zip structure related defaults */ 
 241 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64 
 242 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 
 243 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024 
 244 #define REDIS_LIST_MAX_ZIPLIST_VALUE 32 
 246 /* We can print the stacktrace, so our assert is defined this way: */ 
 247 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) 
 248 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1) 
 249 static void _redisAssert(char *estr
, char *file
, int line
); 
 250 static void _redisPanic(char *msg
, char *file
, int line
); 
 252 /*================================= Data types ============================== */ 
 254 /* A redis object, that is a type able to hold a string / list / set */ 
 256 /* The actual Redis Object */ 
 257 typedef struct redisObject 
{ 
 259     unsigned storage
:2;     /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */ 
 261     unsigned lru
:22;        /* lru time (relative to server.lruclock) */ 
 264     /* VM fields are only allocated if VM is active, otherwise the 
 265      * object allocation function will just allocate 
 266      * sizeof(redisObjct) minus sizeof(redisObjectVM), so using 
 267      * Redis without VM active will not have any overhead. */ 
 270 /* The VM pointer structure - identifies an object in the swap file. 
 272  * This object is stored in place of the value 
 273  * object in the main key->value hash table representing a database. 
 274  * Note that the first fields (type, storage) are the same as the redisObject 
 275  * structure so that vmPointer strucuters can be accessed even when casted 
 276  * as redisObject structures. 
 278  * This is useful as we don't know if a value object is or not on disk, but we 
 279  * are always able to read obj->storage to check this. For vmPointer 
 280  * structures "type" is set to REDIS_VMPOINTER (even if without this field 
 281  * is still possible to check the kind of object from the value of 'storage').*/ 
 282 typedef struct vmPointer 
{ 
 284     unsigned storage
:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
 286     unsigned int vtype
; /* type of the object stored in the swap file */ 
 287     off_t page
;         /* the page at witch the object is stored on disk */ 
 288     off_t usedpages
;    /* number of pages used on disk */ 
 291 /* Macro used to initalize a Redis object allocated on the stack. 
 292  * Note that this macro is taken near the structure definition to make sure 
 293  * we'll update it when the structure is changed, to avoid bugs like 
 294  * bug #85 introduced exactly in this way. */ 
 295 #define initStaticStringObject(_var,_ptr) do { \ 
 297     _var.type = REDIS_STRING; \ 
 298     _var.encoding = REDIS_ENCODING_RAW; \ 
 300     _var.storage = REDIS_VM_MEMORY; \ 
 303 typedef struct redisDb 
{ 
 304     dict 
*dict
;                 /* The keyspace for this DB */ 
 305     dict 
*expires
;              /* Timeout of keys with a timeout set */ 
 306     dict 
*blocking_keys
;        /* Keys with clients waiting for data (BLPOP) */ 
 307     dict 
*io_keys
;              /* Keys with clients waiting for VM I/O */ 
 308     dict 
*watched_keys
;         /* WATCHED keys for MULTI/EXEC CAS */ 
 312 /* Client MULTI/EXEC state */ 
 313 typedef struct multiCmd 
{ 
 316     struct redisCommand 
*cmd
; 
 319 typedef struct multiState 
{ 
 320     multiCmd 
*commands
;     /* Array of MULTI commands */ 
 321     int count
;              /* Total number of MULTI commands */ 
 324 /* With multiplexing we need to take per-clinet state. 
 325  * Clients are taken in a liked list. */ 
 326 typedef struct redisClient 
{ 
 331     robj 
**argv
, **mbargv
; 
 333     int bulklen
;            /* bulk read len. -1 if not in bulk read mode */ 
 334     int multibulk
;          /* multi bulk command format active */ 
 337     time_t lastinteraction
; /* time of the last interaction, used for timeout */ 
 338     int flags
;              /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ 
 339     int slaveseldb
;         /* slave selected db, if this client is a slave */ 
 340     int authenticated
;      /* when requirepass is non-NULL */ 
 341     int replstate
;          /* replication state if this is a slave */ 
 342     int repldbfd
;           /* replication DB file descriptor */ 
 343     long repldboff
;         /* replication DB file offset */ 
 344     off_t repldbsize
;       /* replication DB file size */ 
 345     multiState mstate
;      /* MULTI/EXEC state */ 
 346     robj 
**blocking_keys
;   /* The key we are waiting to terminate a blocking 
 347                              * operation such as BLPOP. Otherwise NULL. */ 
 348     int blocking_keys_num
;  /* Number of blocking keys */ 
 349     time_t blockingto
;      /* Blocking operation timeout. If UNIX current time 
 350                              * is >= blockingto then the operation timed out. */ 
 351     list 
*io_keys
;          /* Keys this client is waiting to be loaded from the 
 352                              * swap file in order to continue. */ 
 353     list 
*watched_keys
;     /* Keys WATCHED for MULTI/EXEC CAS */ 
 354     dict 
*pubsub_channels
;  /* channels a client is interested in (SUBSCRIBE) */ 
 355     list 
*pubsub_patterns
;  /* patterns a client is interested in (SUBSCRIBE) */ 
 363 /* Global server state structure */ 
 368     long long dirty
;            /* changes to DB from the last save */ 
 370     list 
*slaves
, *monitors
; 
 371     char neterr
[ANET_ERR_LEN
]; 
 373     int cronloops
;              /* number of times the cron function run */ 
 374     list 
*objfreelist
;          /* A list of freed objects to avoid malloc() */ 
 375     time_t lastsave
;            /* Unix time of last save succeeede */ 
 376     /* Fields used only for stats */ 
 377     time_t stat_starttime
;         /* server start time */ 
 378     long long stat_numcommands
;    /* number of processed commands */ 
 379     long long stat_numconnections
; /* number of connections received */ 
 380     long long stat_expiredkeys
;   /* number of expired keys */ 
 389     int no_appendfsync_on_rewrite
; 
 395     pid_t bgsavechildpid
; 
 396     pid_t bgrewritechildpid
; 
 397     sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */ 
 398     sds aofbuf
;       /* AOF buffer, written before entering the event loop */ 
 399     struct saveparam 
*saveparams
; 
 404     char *appendfilename
; 
 408     /* Replication related */ 
 413     redisClient 
*master
;    /* client that is master for this slave */ 
 415     unsigned int maxclients
; 
 416     unsigned long long maxmemory
; 
 417     unsigned int blpop_blocked_clients
; 
 418     unsigned int vm_blocked_clients
; 
 419     /* Sort parameters - qsort_r() is only available under BSD so we 
 420      * have to take this state global, in order to pass it to sortCompare() */ 
 424     /* Virtual memory configuration */ 
 429     unsigned long long vm_max_memory
; 
 430     /* Zip structure config */ 
 431     size_t hash_max_zipmap_entries
; 
 432     size_t hash_max_zipmap_value
; 
 433     size_t list_max_ziplist_entries
; 
 434     size_t list_max_ziplist_value
; 
 435     /* Virtual memory state */ 
 438     off_t vm_next_page
; /* Next probably empty page */ 
 439     off_t vm_near_pages
; /* Number of pages allocated sequentially */ 
 440     unsigned char *vm_bitmap
; /* Bitmap of free/used pages */ 
 441     time_t unixtime
;    /* Unix time sampled every second. */ 
 442     /* Virtual memory I/O threads stuff */ 
 443     /* An I/O thread process an element taken from the io_jobs queue and 
 444      * put the result of the operation in the io_done list. While the 
 445      * job is being processed, it's put on io_processing queue. */ 
 446     list 
*io_newjobs
; /* List of VM I/O jobs yet to be processed */ 
 447     list 
*io_processing
; /* List of VM I/O jobs being processed */ 
 448     list 
*io_processed
; /* List of VM I/O jobs already processed */ 
 449     list 
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */ 
 450     pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */ 
 451     pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */ 
 452     pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */ 
 453     pthread_attr_t io_threads_attr
; /* attributes for threads creation */ 
 454     int io_active_threads
; /* Number of running I/O threads */ 
 455     int vm_max_threads
; /* Max number of I/O threads running at the same time */ 
 456     /* Our main thread is blocked on the event loop, locking for sockets ready 
 457      * to be read or written, so when a threaded I/O operation is ready to be 
 458      * processed by the main thread, the I/O thread will use a unix pipe to 
 459      * awake the main thread. The followings are the two pipe FDs. */ 
 460     int io_ready_pipe_read
; 
 461     int io_ready_pipe_write
; 
 462     /* Virtual memory stats */ 
 463     unsigned long long vm_stats_used_pages
; 
 464     unsigned long long vm_stats_swapped_objects
; 
 465     unsigned long long vm_stats_swapouts
; 
 466     unsigned long long vm_stats_swapins
; 
 468     dict 
*pubsub_channels
; /* Map channels to list of subscribed clients */ 
 469     list 
*pubsub_patterns
; /* A list of pubsub_patterns */ 
 472     unsigned lruclock
:22;        /* clock incrementing every minute, for LRU */ 
 473     unsigned lruclock_padding
:10; 
 476 typedef struct pubsubPattern 
{ 
 481 typedef void redisCommandProc(redisClient 
*c
); 
 482 typedef void redisVmPreloadProc(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 483 struct redisCommand 
{ 
 485     redisCommandProc 
*proc
; 
 488     /* Use a function to determine which keys need to be loaded 
 489      * in the background prior to executing this command. Takes precedence 
 490      * over vm_firstkey and others, ignored when NULL */ 
 491     redisVmPreloadProc 
*vm_preload_proc
; 
 492     /* What keys should be loaded in background when calling this command? */ 
 493     int vm_firstkey
; /* The first argument that's a key (0 = no keys) */ 
 494     int vm_lastkey
;  /* THe last argument that's a key */ 
 495     int vm_keystep
;  /* The step between first and last key */ 
 498 struct redisFunctionSym 
{ 
 500     unsigned long pointer
; 
 503 typedef struct _redisSortObject 
{ 
 511 typedef struct _redisSortOperation 
{ 
 514 } redisSortOperation
; 
 516 /* ZSETs use a specialized version of Skiplists */ 
 518 typedef struct zskiplistNode 
{ 
 519     struct zskiplistNode 
**forward
; 
 520     struct zskiplistNode 
*backward
; 
 526 typedef struct zskiplist 
{ 
 527     struct zskiplistNode 
*header
, *tail
; 
 528     unsigned long length
; 
 532 typedef struct zset 
{ 
 537 /* Our shared "common" objects */ 
 539 #define REDIS_SHARED_INTEGERS 10000 
 540 struct sharedObjectsStruct 
{ 
 541     robj 
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
, 
 542     *colon
, *nullbulk
, *nullmultibulk
, *queued
, 
 543     *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
, 
 544     *outofrangeerr
, *plus
, 
 545     *select0
, *select1
, *select2
, *select3
, *select4
, 
 546     *select5
, *select6
, *select7
, *select8
, *select9
, 
 547     *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
, 
 548     *mbulk4
, *psubscribebulk
, *punsubscribebulk
, 
 549     *integers
[REDIS_SHARED_INTEGERS
]; 
 552 /* Global vars that are actally used as constants. The following double 
 553  * values are used for double on-disk serialization, and are initialized 
 554  * at runtime to avoid strange compiler optimizations. */ 
 556 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
; 
 558 /* VM threaded I/O request message */ 
 559 #define REDIS_IOJOB_LOAD 0          /* Load from disk to memory */ 
 560 #define REDIS_IOJOB_PREPARE_SWAP 1  /* Compute needed pages */ 
 561 #define REDIS_IOJOB_DO_SWAP 2       /* Swap from memory to disk */ 
 562 typedef struct iojob 
{ 
 563     int type
;   /* Request type, REDIS_IOJOB_* */ 
 564     redisDb 
*db
;/* Redis database */ 
 565     robj 
*key
;  /* This I/O request is about swapping this key */ 
 566     robj 
*id
;   /* Unique identifier of this job: 
 567                    this is the object to swap for REDIS_IOREQ_*_SWAP, or the 
 568                    vmpointer objct for REDIS_IOREQ_LOAD. */ 
 569     robj 
*val
;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this 
 570                  * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ 
 571     off_t page
; /* Swap page where to read/write the object */ 
 572     off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */ 
 573     int canceled
; /* True if this command was canceled by blocking side of VM */ 
 574     pthread_t thread
; /* ID of the thread processing this entry */ 
 577 /*================================ Prototypes =============================== */ 
 579 static void freeStringObject(robj 
*o
); 
 580 static void freeListObject(robj 
*o
); 
 581 static void freeSetObject(robj 
*o
); 
 582 static void decrRefCount(void *o
); 
 583 static robj 
*createObject(int type
, void *ptr
); 
 584 static void freeClient(redisClient 
*c
); 
 585 static int rdbLoad(char *filename
); 
 586 static void addReply(redisClient 
*c
, robj 
*obj
); 
 587 static void addReplySds(redisClient 
*c
, sds s
); 
 588 static void incrRefCount(robj 
*o
); 
 589 static int rdbSaveBackground(char *filename
); 
 590 static robj 
*createStringObject(char *ptr
, size_t len
); 
 591 static robj 
*dupStringObject(robj 
*o
); 
 592 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
); 
 593 static void replicationFeedMonitors(list 
*monitors
, int dictid
, robj 
**argv
, int argc
); 
 594 static void flushAppendOnlyFile(void); 
 595 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 596 static int syncWithMaster(void); 
 597 static robj 
*tryObjectEncoding(robj 
*o
); 
 598 static robj 
*getDecodedObject(robj 
*o
); 
 599 static int removeExpire(redisDb 
*db
, robj 
*key
); 
 600 static int expireIfNeeded(redisDb 
*db
, robj 
*key
); 
 601 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
); 
 602 static int dbDelete(redisDb 
*db
, robj 
*key
); 
 603 static time_t getExpire(redisDb 
*db
, robj 
*key
); 
 604 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
); 
 605 static void updateSlavesWaitingBgsave(int bgsaveerr
); 
 606 static void freeMemoryIfNeeded(void); 
 607 static int processCommand(redisClient 
*c
); 
 608 static void setupSigSegvAction(void); 
 609 static void rdbRemoveTempFile(pid_t childpid
); 
 610 static void aofRemoveTempFile(pid_t childpid
); 
 611 static size_t stringObjectLen(robj 
*o
); 
 612 static void processInputBuffer(redisClient 
*c
); 
 613 static zskiplist 
*zslCreate(void); 
 614 static void zslFree(zskiplist 
*zsl
); 
 615 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
); 
 616 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 617 static void initClientMultiState(redisClient 
*c
); 
 618 static void freeClientMultiState(redisClient 
*c
); 
 619 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
); 
 620 static void unblockClientWaitingData(redisClient 
*c
); 
 621 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
); 
 622 static void vmInit(void); 
 623 static void vmMarkPagesFree(off_t page
, off_t count
); 
 624 static robj 
*vmLoadObject(robj 
*o
); 
 625 static robj 
*vmPreviewObject(robj 
*o
); 
 626 static int vmSwapOneObjectBlocking(void); 
 627 static int vmSwapOneObjectThreaded(void); 
 628 static int vmCanSwapOut(void); 
 629 static int tryFreeOneObjectFromFreelist(void); 
 630 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 631 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 632 static void vmCancelThreadedIOJob(robj 
*o
); 
 633 static void lockThreadedIO(void); 
 634 static void unlockThreadedIO(void); 
 635 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
); 
 636 static void freeIOJob(iojob 
*j
); 
 637 static void queueIOJob(iojob 
*j
); 
 638 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
); 
 639 static robj 
*vmReadObjectFromSwap(off_t page
, int type
); 
 640 static void waitEmptyIOJobsQueue(void); 
 641 static void vmReopenSwapFile(void); 
 642 static int vmFreePage(off_t page
); 
 643 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 644 static void execBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 645 static int blockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
); 
 646 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
); 
 647 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
); 
 648 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 649 static struct redisCommand 
*lookupCommand(char *name
); 
 650 static void call(redisClient 
*c
, struct redisCommand 
*cmd
); 
 651 static void resetClient(redisClient 
*c
); 
 652 static void convertToRealHash(robj 
*o
); 
 653 static void listTypeConvert(robj 
*o
, int enc
); 
 654 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
); 
 655 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
); 
 656 static void freePubsubPattern(void *p
); 
 657 static int listMatchPubsubPattern(void *a
, void *b
); 
 658 static int compareStringObjects(robj 
*a
, robj 
*b
); 
 659 static int equalStringObjects(robj 
*a
, robj 
*b
); 
 661 static int rewriteAppendOnlyFileBackground(void); 
 662 static vmpointer 
*vmSwapObjectBlocking(robj 
*val
); 
 663 static int prepareForShutdown(); 
 664 static void touchWatchedKey(redisDb 
*db
, robj 
*key
); 
 665 static void touchWatchedKeysOnFlush(int dbid
); 
 666 static void unwatchAllKeys(redisClient 
*c
); 
 668 static void authCommand(redisClient 
*c
); 
 669 static void pingCommand(redisClient 
*c
); 
 670 static void echoCommand(redisClient 
*c
); 
 671 static void setCommand(redisClient 
*c
); 
 672 static void setnxCommand(redisClient 
*c
); 
 673 static void setexCommand(redisClient 
*c
); 
 674 static void getCommand(redisClient 
*c
); 
 675 static void delCommand(redisClient 
*c
); 
 676 static void existsCommand(redisClient 
*c
); 
 677 static void incrCommand(redisClient 
*c
); 
 678 static void decrCommand(redisClient 
*c
); 
 679 static void incrbyCommand(redisClient 
*c
); 
 680 static void decrbyCommand(redisClient 
*c
); 
 681 static void selectCommand(redisClient 
*c
); 
 682 static void randomkeyCommand(redisClient 
*c
); 
 683 static void keysCommand(redisClient 
*c
); 
 684 static void dbsizeCommand(redisClient 
*c
); 
 685 static void lastsaveCommand(redisClient 
*c
); 
 686 static void saveCommand(redisClient 
*c
); 
 687 static void bgsaveCommand(redisClient 
*c
); 
 688 static void bgrewriteaofCommand(redisClient 
*c
); 
 689 static void shutdownCommand(redisClient 
*c
); 
 690 static void moveCommand(redisClient 
*c
); 
 691 static void renameCommand(redisClient 
*c
); 
 692 static void renamenxCommand(redisClient 
*c
); 
 693 static void lpushCommand(redisClient 
*c
); 
 694 static void rpushCommand(redisClient 
*c
); 
 695 static void lpushxCommand(redisClient 
*c
); 
 696 static void rpushxCommand(redisClient 
*c
); 
 697 static void linsertCommand(redisClient 
*c
); 
 698 static void lpopCommand(redisClient 
*c
); 
 699 static void rpopCommand(redisClient 
*c
); 
 700 static void llenCommand(redisClient 
*c
); 
 701 static void lindexCommand(redisClient 
*c
); 
 702 static void lrangeCommand(redisClient 
*c
); 
 703 static void ltrimCommand(redisClient 
*c
); 
 704 static void typeCommand(redisClient 
*c
); 
 705 static void lsetCommand(redisClient 
*c
); 
 706 static void saddCommand(redisClient 
*c
); 
 707 static void sremCommand(redisClient 
*c
); 
 708 static void smoveCommand(redisClient 
*c
); 
 709 static void sismemberCommand(redisClient 
*c
); 
 710 static void scardCommand(redisClient 
*c
); 
 711 static void spopCommand(redisClient 
*c
); 
 712 static void srandmemberCommand(redisClient 
*c
); 
 713 static void sinterCommand(redisClient 
*c
); 
 714 static void sinterstoreCommand(redisClient 
*c
); 
 715 static void sunionCommand(redisClient 
*c
); 
 716 static void sunionstoreCommand(redisClient 
*c
); 
 717 static void sdiffCommand(redisClient 
*c
); 
 718 static void sdiffstoreCommand(redisClient 
*c
); 
 719 static void syncCommand(redisClient 
*c
); 
 720 static void flushdbCommand(redisClient 
*c
); 
 721 static void flushallCommand(redisClient 
*c
); 
 722 static void sortCommand(redisClient 
*c
); 
 723 static void lremCommand(redisClient 
*c
); 
 724 static void rpoplpushcommand(redisClient 
*c
); 
 725 static void infoCommand(redisClient 
*c
); 
 726 static void mgetCommand(redisClient 
*c
); 
 727 static void monitorCommand(redisClient 
*c
); 
 728 static void expireCommand(redisClient 
*c
); 
 729 static void expireatCommand(redisClient 
*c
); 
 730 static void getsetCommand(redisClient 
*c
); 
 731 static void ttlCommand(redisClient 
*c
); 
 732 static void slaveofCommand(redisClient 
*c
); 
 733 static void debugCommand(redisClient 
*c
); 
 734 static void msetCommand(redisClient 
*c
); 
 735 static void msetnxCommand(redisClient 
*c
); 
 736 static void zaddCommand(redisClient 
*c
); 
 737 static void zincrbyCommand(redisClient 
*c
); 
 738 static void zrangeCommand(redisClient 
*c
); 
 739 static void zrangebyscoreCommand(redisClient 
*c
); 
 740 static void zcountCommand(redisClient 
*c
); 
 741 static void zrevrangeCommand(redisClient 
*c
); 
 742 static void zcardCommand(redisClient 
*c
); 
 743 static void zremCommand(redisClient 
*c
); 
 744 static void zscoreCommand(redisClient 
*c
); 
 745 static void zremrangebyscoreCommand(redisClient 
*c
); 
 746 static void multiCommand(redisClient 
*c
); 
 747 static void execCommand(redisClient 
*c
); 
 748 static void discardCommand(redisClient 
*c
); 
 749 static void blpopCommand(redisClient 
*c
); 
 750 static void brpopCommand(redisClient 
*c
); 
 751 static void appendCommand(redisClient 
*c
); 
 752 static void substrCommand(redisClient 
*c
); 
 753 static void zrankCommand(redisClient 
*c
); 
 754 static void zrevrankCommand(redisClient 
*c
); 
 755 static void hsetCommand(redisClient 
*c
); 
 756 static void hsetnxCommand(redisClient 
*c
); 
 757 static void hgetCommand(redisClient 
*c
); 
 758 static void hmsetCommand(redisClient 
*c
); 
 759 static void hmgetCommand(redisClient 
*c
); 
 760 static void hdelCommand(redisClient 
*c
); 
 761 static void hlenCommand(redisClient 
*c
); 
 762 static void zremrangebyrankCommand(redisClient 
*c
); 
 763 static void zunionstoreCommand(redisClient 
*c
); 
 764 static void zinterstoreCommand(redisClient 
*c
); 
 765 static void hkeysCommand(redisClient 
*c
); 
 766 static void hvalsCommand(redisClient 
*c
); 
 767 static void hgetallCommand(redisClient 
*c
); 
 768 static void hexistsCommand(redisClient 
*c
); 
 769 static void configCommand(redisClient 
*c
); 
 770 static void hincrbyCommand(redisClient 
*c
); 
 771 static void subscribeCommand(redisClient 
*c
); 
 772 static void unsubscribeCommand(redisClient 
*c
); 
 773 static void psubscribeCommand(redisClient 
*c
); 
 774 static void punsubscribeCommand(redisClient 
*c
); 
 775 static void publishCommand(redisClient 
*c
); 
 776 static void watchCommand(redisClient 
*c
); 
 777 static void unwatchCommand(redisClient 
*c
); 
 779 /*================================= Globals ================================= */ 
 782 static struct redisServer server
; /* server global state */ 
 783 static struct redisCommand 
*commandTable
; 
 784 static struct redisCommand readonlyCommandTable
[] = { 
 785     {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 786     {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 787     {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 788     {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 789     {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 790     {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 791     {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 792     {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 793     {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 794     {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 795     {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1}, 
 796     {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 797     {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 798     {"rpushx",rpushxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 799     {"lpushx",lpushxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 800     {"linsert",linsertCommand
,5,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 801     {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 802     {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 803     {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 804     {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 805     {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 806     {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 807     {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 808     {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 809     {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 810     {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 811     {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1}, 
 812     {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 813     {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 814     {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1}, 
 815     {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 816     {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 817     {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 818     {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 819     {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 820     {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 821     {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 822     {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 823     {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 824     {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 825     {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 826     {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 827     {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 828     {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 829     {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 830     {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 831     {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 832     {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 833     {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 834     {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 835     {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 836     {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 837     {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 838     {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 839     {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 840     {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 841     {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 842     {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 843     {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 844     {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 845     {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 846     {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 847     {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 848     {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 849     {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 850     {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 851     {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 852     {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 853     {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 854     {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 855     {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 856     {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 857     {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 858     {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 859     {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 860     {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 861     {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 862     {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 863     {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 864     {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 865     {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 866     {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 867     {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 868     {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 869     {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 870     {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 871     {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 872     {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 873     {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 874     {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 875     {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 876     {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 877     {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0}, 
 878     {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 879     {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 880     {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 881     {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 882     {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 883     {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 884     {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 885     {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 886     {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 887     {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 888     {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 889     {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 890     {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 891     {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 892     {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 893     {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0}, 
 894     {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 895     {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0} 
 898 /*============================ Utility functions ============================ */ 
 900 /* Glob-style pattern matching. */ 
 901 static int stringmatchlen(const char *pattern
, int patternLen
, 
 902         const char *string
, int stringLen
, int nocase
) 
 907             while (pattern
[1] == '*') { 
 912                 return 1; /* match */ 
 914                 if (stringmatchlen(pattern
+1, patternLen
-1, 
 915                             string
, stringLen
, nocase
)) 
 916                     return 1; /* match */ 
 920             return 0; /* no match */ 
 924                 return 0; /* no match */ 
 934             not = pattern
[0] == '^'; 
 941                 if (pattern
[0] == '\\') { 
 944                     if (pattern
[0] == string
[0]) 
 946                 } else if (pattern
[0] == ']') { 
 948                 } else if (patternLen 
== 0) { 
 952                 } else if (pattern
[1] == '-' && patternLen 
>= 3) { 
 953                     int start 
= pattern
[0]; 
 954                     int end 
= pattern
[2]; 
 962                         start 
= tolower(start
); 
 968                     if (c 
>= start 
&& c 
<= end
) 
 972                         if (pattern
[0] == string
[0]) 
 975                         if (tolower((int)pattern
[0]) == tolower((int)string
[0])) 
 985                 return 0; /* no match */ 
 991             if (patternLen 
>= 2) { 
 998                 if (pattern
[0] != string
[0]) 
 999                     return 0; /* no match */ 
1001                 if (tolower((int)pattern
[0]) != tolower((int)string
[0])) 
1002                     return 0; /* no match */ 
1010         if (stringLen 
== 0) { 
1011             while(*pattern 
== '*') { 
1018     if (patternLen 
== 0 && stringLen 
== 0) 
1023 static int stringmatch(const char *pattern
, const char *string
, int nocase
) { 
1024     return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
); 
1027 /* Convert a string representing an amount of memory into the number of 
1028  * bytes, so for instance memtoll("1Gi") will return 1073741824 that is 
1031  * On parsing error, if *err is not NULL, it's set to 1, otherwise it's 
1033 static long long memtoll(const char *p
, int *err
) { 
1036     long mul
; /* unit multiplier */ 
1038     unsigned int digits
; 
1041     /* Search the first non digit character. */ 
1044     while(*u 
&& isdigit(*u
)) u
++; 
1045     if (*u 
== '\0' || !strcasecmp(u
,"b")) { 
1047     } else if (!strcasecmp(u
,"k")) { 
1049     } else if (!strcasecmp(u
,"kb")) { 
1051     } else if (!strcasecmp(u
,"m")) { 
1053     } else if (!strcasecmp(u
,"mb")) { 
1055     } else if (!strcasecmp(u
,"g")) { 
1056         mul 
= 1000L*1000*1000; 
1057     } else if (!strcasecmp(u
,"gb")) { 
1058         mul 
= 1024L*1024*1024; 
1064     if (digits 
>= sizeof(buf
)) { 
1068     memcpy(buf
,p
,digits
); 
1070     val 
= strtoll(buf
,NULL
,10); 
1074 /* Convert a long long into a string. Returns the number of 
1075  * characters needed to represent the number, that can be shorter if passed 
1076  * buffer length is not enough to store the whole number. */ 
1077 static int ll2string(char *s
, size_t len
, long long value
) { 
1079     unsigned long long v
; 
1082     if (len 
== 0) return 0; 
1083     v 
= (value 
< 0) ? -value 
: value
; 
1084     p 
= buf
+31; /* point to the last character */ 
1089     if (value 
< 0) *p
-- = '-'; 
1092     if (l
+1 > len
) l 
= len
-1; /* Make sure it fits, including the nul term */ 
1098 static void redisLog(int level
, const char *fmt
, ...) { 
1102     fp 
= (server
.logfile 
== NULL
) ? stdout 
: fopen(server
.logfile
,"a"); 
1106     if (level 
>= server
.verbosity
) { 
1112         strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
)); 
1113         fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]); 
1114         vfprintf(fp
, fmt
, ap
); 
1120     if (server
.logfile
) fclose(fp
); 
1123 /*====================== Hash table type implementation  ==================== */ 
1125 /* This is an hash table type that uses the SDS dynamic strings libary as 
1126  * keys and radis objects as values (objects can hold SDS strings, 
1129 static void dictVanillaFree(void *privdata
, void *val
) 
1131     DICT_NOTUSED(privdata
); 
1135 static void dictListDestructor(void *privdata
, void *val
) 
1137     DICT_NOTUSED(privdata
); 
1138     listRelease((list
*)val
); 
1141 static int dictSdsKeyCompare(void *privdata
, const void *key1
, 
1145     DICT_NOTUSED(privdata
); 
1147     l1 
= sdslen((sds
)key1
); 
1148     l2 
= sdslen((sds
)key2
); 
1149     if (l1 
!= l2
) return 0; 
1150     return memcmp(key1
, key2
, l1
) == 0; 
1153 static void dictRedisObjectDestructor(void *privdata
, void *val
) 
1155     DICT_NOTUSED(privdata
); 
1157     if (val 
== NULL
) return; /* Values of swapped out keys as set to NULL */ 
1161 static void dictSdsDestructor(void *privdata
, void *val
) 
1163     DICT_NOTUSED(privdata
); 
1168 static int dictObjKeyCompare(void *privdata
, const void *key1
, 
1171     const robj 
*o1 
= key1
, *o2 
= key2
; 
1172     return dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1175 static unsigned int dictObjHash(const void *key
) { 
1176     const robj 
*o 
= key
; 
1177     return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1180 static unsigned int dictSdsHash(const void *key
) { 
1181     return dictGenHashFunction((unsigned char*)key
, sdslen((char*)key
)); 
1184 static int dictEncObjKeyCompare(void *privdata
, const void *key1
, 
1187     robj 
*o1 
= (robj
*) key1
, *o2 
= (robj
*) key2
; 
1190     if (o1
->encoding 
== REDIS_ENCODING_INT 
&& 
1191         o2
->encoding 
== REDIS_ENCODING_INT
) 
1192             return o1
->ptr 
== o2
->ptr
; 
1194     o1 
= getDecodedObject(o1
); 
1195     o2 
= getDecodedObject(o2
); 
1196     cmp 
= dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1202 static unsigned int dictEncObjHash(const void *key
) { 
1203     robj 
*o 
= (robj
*) key
; 
1205     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
1206         return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1208         if (o
->encoding 
== REDIS_ENCODING_INT
) { 
1212             len 
= ll2string(buf
,32,(long)o
->ptr
); 
1213             return dictGenHashFunction((unsigned char*)buf
, len
); 
1217             o 
= getDecodedObject(o
); 
1218             hash 
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1226 static dictType setDictType 
= { 
1227     dictEncObjHash
,            /* hash function */ 
1230     dictEncObjKeyCompare
,      /* key compare */ 
1231     dictRedisObjectDestructor
, /* key destructor */ 
1232     NULL                       
/* val destructor */ 
1235 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ 
1236 static dictType zsetDictType 
= { 
1237     dictEncObjHash
,            /* hash function */ 
1240     dictEncObjKeyCompare
,      /* key compare */ 
1241     dictRedisObjectDestructor
, /* key destructor */ 
1242     dictVanillaFree            
/* val destructor of malloc(sizeof(double)) */ 
1245 /* Db->dict, keys are sds strings, vals are Redis objects. */ 
1246 static dictType dbDictType 
= { 
1247     dictSdsHash
,                /* hash function */ 
1250     dictSdsKeyCompare
,          /* key compare */ 
1251     dictSdsDestructor
,          /* key destructor */ 
1252     dictRedisObjectDestructor   
/* val destructor */ 
1256 static dictType keyptrDictType 
= { 
1257     dictSdsHash
,               /* hash function */ 
1260     dictSdsKeyCompare
,         /* key compare */ 
1261     dictSdsDestructor
,         /* key destructor */ 
1262     NULL                       
/* val destructor */ 
1265 /* Hash type hash table (note that small hashes are represented with zimpaps) */ 
1266 static dictType hashDictType 
= { 
1267     dictEncObjHash
,             /* hash function */ 
1270     dictEncObjKeyCompare
,       /* key compare */ 
1271     dictRedisObjectDestructor
,  /* key destructor */ 
1272     dictRedisObjectDestructor   
/* val destructor */ 
1275 /* Keylist hash table type has unencoded redis objects as keys and 
1276  * lists as values. It's used for blocking operations (BLPOP) and to 
1277  * map swapped keys to a list of clients waiting for this keys to be loaded. */ 
1278 static dictType keylistDictType 
= { 
1279     dictObjHash
,                /* hash function */ 
1282     dictObjKeyCompare
,          /* key compare */ 
1283     dictRedisObjectDestructor
,  /* key destructor */ 
1284     dictListDestructor          
/* val destructor */ 
1287 static void version(); 
1289 /* ========================= Random utility functions ======================= */ 
1291 /* Redis generally does not try to recover from out of memory conditions 
1292  * when allocating objects or strings, it is not clear if it will be possible 
1293  * to report this condition to the client since the networking layer itself 
1294  * is based on heap allocation for send buffers, so we simply abort. 
1295  * At least the code will be simpler to read... */ 
1296 static void oom(const char *msg
) { 
1297     redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
); 
1302 /* ====================== Redis server networking stuff ===================== */ 
1303 static void closeTimedoutClients(void) { 
1306     time_t now 
= time(NULL
); 
1309     listRewind(server
.clients
,&li
); 
1310     while ((ln 
= listNext(&li
)) != NULL
) { 
1311         c 
= listNodeValue(ln
); 
1312         if (server
.maxidletime 
&& 
1313             !(c
->flags 
& REDIS_SLAVE
) &&    /* no timeout for slaves */ 
1314             !(c
->flags 
& REDIS_MASTER
) &&   /* no timeout for masters */ 
1315             dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */ 
1316             listLength(c
->pubsub_patterns
) == 0 && 
1317             (now 
- c
->lastinteraction 
> server
.maxidletime
)) 
1319             redisLog(REDIS_VERBOSE
,"Closing idle client"); 
1321         } else if (c
->flags 
& REDIS_BLOCKED
) { 
1322             if (c
->blockingto 
!= 0 && c
->blockingto 
< now
) { 
1323                 addReply(c
,shared
.nullmultibulk
); 
1324                 unblockClientWaitingData(c
); 
1330 static int htNeedsResize(dict 
*dict
) { 
1331     long long size
, used
; 
1333     size 
= dictSlots(dict
); 
1334     used 
= dictSize(dict
); 
1335     return (size 
&& used 
&& size 
> DICT_HT_INITIAL_SIZE 
&& 
1336             (used
*100/size 
< REDIS_HT_MINFILL
)); 
1339 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL 
1340  * we resize the hash table to save memory */ 
1341 static void tryResizeHashTables(void) { 
1344     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1345         if (htNeedsResize(server
.db
[j
].dict
)) 
1346             dictResize(server
.db
[j
].dict
); 
1347         if (htNeedsResize(server
.db
[j
].expires
)) 
1348             dictResize(server
.db
[j
].expires
); 
1352 /* Our hash table implementation performs rehashing incrementally while 
1353  * we write/read from the hash table. Still if the server is idle, the hash 
1354  * table will use two tables for a long time. So we try to use 1 millisecond 
1355  * of CPU time at every serverCron() loop in order to rehash some key. */ 
1356 static void incrementallyRehash(void) { 
1359     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1360         if (dictIsRehashing(server
.db
[j
].dict
)) { 
1361             dictRehashMilliseconds(server
.db
[j
].dict
,1); 
1362             break; /* already used our millisecond for this loop... */ 
1367 /* A background saving child (BGSAVE) terminated its work. Handle this. */ 
1368 void backgroundSaveDoneHandler(int statloc
) { 
1369     int exitcode 
= WEXITSTATUS(statloc
); 
1370     int bysignal 
= WIFSIGNALED(statloc
); 
1372     if (!bysignal 
&& exitcode 
== 0) { 
1373         redisLog(REDIS_NOTICE
, 
1374             "Background saving terminated with success"); 
1376         server
.lastsave 
= time(NULL
); 
1377     } else if (!bysignal 
&& exitcode 
!= 0) { 
1378         redisLog(REDIS_WARNING
, "Background saving error"); 
1380         redisLog(REDIS_WARNING
, 
1381             "Background saving terminated by signal %d", WTERMSIG(statloc
)); 
1382         rdbRemoveTempFile(server
.bgsavechildpid
); 
1384     server
.bgsavechildpid 
= -1; 
1385     /* Possibly there are slaves waiting for a BGSAVE in order to be served 
1386      * (the first stage of SYNC is a bulk transfer of dump.rdb) */ 
1387     updateSlavesWaitingBgsave(exitcode 
== 0 ? REDIS_OK 
: REDIS_ERR
); 
1390 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. 
1392 void backgroundRewriteDoneHandler(int statloc
) { 
1393     int exitcode 
= WEXITSTATUS(statloc
); 
1394     int bysignal 
= WIFSIGNALED(statloc
); 
1396     if (!bysignal 
&& exitcode 
== 0) { 
1400         redisLog(REDIS_NOTICE
, 
1401             "Background append only file rewriting terminated with success"); 
1402         /* Now it's time to flush the differences accumulated by the parent */ 
1403         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
); 
1404         fd 
= open(tmpfile
,O_WRONLY
|O_APPEND
); 
1406             redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
)); 
1409         /* Flush our data... */ 
1410         if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) != 
1411                 (signed) sdslen(server
.bgrewritebuf
)) { 
1412             redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
)); 
1416         redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
)); 
1417         /* Now our work is to rename the temp file into the stable file. And 
1418          * switch the file descriptor used by the server for append only. */ 
1419         if (rename(tmpfile
,server
.appendfilename
) == -1) { 
1420             redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
)); 
1424         /* Mission completed... almost */ 
1425         redisLog(REDIS_NOTICE
,"Append only file successfully rewritten."); 
1426         if (server
.appendfd 
!= -1) { 
1427             /* If append only is actually enabled... */ 
1428             close(server
.appendfd
); 
1429             server
.appendfd 
= fd
; 
1430             if (server
.appendfsync 
!= APPENDFSYNC_NO
) aof_fsync(fd
); 
1431             server
.appendseldb 
= -1; /* Make sure it will issue SELECT */ 
1432             redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends."); 
1434             /* If append only is disabled we just generate a dump in this 
1435              * format. Why not? */ 
1438     } else if (!bysignal 
&& exitcode 
!= 0) { 
1439         redisLog(REDIS_WARNING
, "Background append only file rewriting error"); 
1441         redisLog(REDIS_WARNING
, 
1442             "Background append only file rewriting terminated by signal %d", 
1446     sdsfree(server
.bgrewritebuf
); 
1447     server
.bgrewritebuf 
= sdsempty(); 
1448     aofRemoveTempFile(server
.bgrewritechildpid
); 
1449     server
.bgrewritechildpid 
= -1; 
1452 /* This function is called once a background process of some kind terminates, 
1453  * as we want to avoid resizing the hash tables when there is a child in order 
1454  * to play well with copy-on-write (otherwise when a resize happens lots of 
1455  * memory pages are copied). The goal of this function is to update the ability 
1456  * for dict.c to resize the hash tables accordingly to the fact we have o not 
1457  * running childs. */ 
1458 static void updateDictResizePolicy(void) { 
1459     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) 
1462         dictDisableResize(); 
1465 static int serverCron(struct aeEventLoop 
*eventLoop
, long long id
, void *clientData
) { 
1466     int j
, loops 
= server
.cronloops
++; 
1467     REDIS_NOTUSED(eventLoop
); 
1469     REDIS_NOTUSED(clientData
); 
1471     /* We take a cached value of the unix time in the global state because 
1472      * with virtual memory and aging there is to store the current time 
1473      * in objects at every object access, and accuracy is not needed. 
1474      * To access a global var is faster than calling time(NULL) */ 
1475     server
.unixtime 
= time(NULL
); 
1476     /* We have just 21 bits per object for LRU information. 
1477      * So we use an (eventually wrapping) LRU clock with minutes resolution. 
1479      * When we need to select what object to swap, we compute the minimum 
1480      * time distance between the current lruclock and the object last access 
1481      * lruclock info. Even if clocks will wrap on overflow, there is 
1482      * the interesting property that we are sure that at least 
1483      * ABS(A-B) minutes passed between current time and timestamp B. 
1485      * This is not precise but we don't need at all precision, but just 
1486      * something statistically reasonable. 
1488     server
.lruclock 
= (time(NULL
)/60)&((1<<21)-1); 
1490     /* We received a SIGTERM, shutting down here in a safe way, as it is 
1491      * not ok doing so inside the signal handler. */ 
1492     if (server
.shutdown_asap
) { 
1493         if (prepareForShutdown() == REDIS_OK
) exit(0); 
1494         redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information"); 
1497     /* Show some info about non-empty databases */ 
1498     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1499         long long size
, used
, vkeys
; 
1501         size 
= dictSlots(server
.db
[j
].dict
); 
1502         used 
= dictSize(server
.db
[j
].dict
); 
1503         vkeys 
= dictSize(server
.db
[j
].expires
); 
1504         if (!(loops 
% 50) && (used 
|| vkeys
)) { 
1505             redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
); 
1506             /* dictPrintStats(server.dict); */ 
1510     /* We don't want to resize the hash tables while a bacground saving 
1511      * is in progress: the saving child is created using fork() that is 
1512      * implemented with a copy-on-write semantic in most modern systems, so 
1513      * if we resize the HT while there is the saving child at work actually 
1514      * a lot of memory movements in the parent will cause a lot of pages 
1516     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) { 
1517         if (!(loops 
% 10)) tryResizeHashTables(); 
1518         if (server
.activerehashing
) incrementallyRehash(); 
1521     /* Show information about connected clients */ 
1522     if (!(loops 
% 50)) { 
1523         redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use", 
1524             listLength(server
.clients
)-listLength(server
.slaves
), 
1525             listLength(server
.slaves
), 
1526             zmalloc_used_memory()); 
1529     /* Close connections of timedout clients */ 
1530     if ((server
.maxidletime 
&& !(loops 
% 100)) || server
.blpop_blocked_clients
) 
1531         closeTimedoutClients(); 
1533     /* Check if a background saving or AOF rewrite in progress terminated */ 
1534     if (server
.bgsavechildpid 
!= -1 || server
.bgrewritechildpid 
!= -1) { 
1538         if ((pid 
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) { 
1539             if (pid 
== server
.bgsavechildpid
) { 
1540                 backgroundSaveDoneHandler(statloc
); 
1542                 backgroundRewriteDoneHandler(statloc
); 
1544             updateDictResizePolicy(); 
1547         /* If there is not a background saving in progress check if 
1548          * we have to save now */ 
1549          time_t now 
= time(NULL
); 
1550          for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
1551             struct saveparam 
*sp 
= server
.saveparams
+j
; 
1553             if (server
.dirty 
>= sp
->changes 
&& 
1554                 now
-server
.lastsave 
> sp
->seconds
) { 
1555                 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...", 
1556                     sp
->changes
, sp
->seconds
); 
1557                 rdbSaveBackground(server
.dbfilename
); 
1563     /* Try to expire a few timed out keys. The algorithm used is adaptive and 
1564      * will use few CPU cycles if there are few expiring keys, otherwise 
1565      * it will get more aggressive to avoid that too much memory is used by 
1566      * keys that can be removed from the keyspace. */ 
1567     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1569         redisDb 
*db 
= server
.db
+j
; 
1571         /* Continue to expire if at the end of the cycle more than 25% 
1572          * of the keys were expired. */ 
1574             long num 
= dictSize(db
->expires
); 
1575             time_t now 
= time(NULL
); 
1578             if (num 
> REDIS_EXPIRELOOKUPS_PER_CRON
) 
1579                 num 
= REDIS_EXPIRELOOKUPS_PER_CRON
; 
1584                 if ((de 
= dictGetRandomKey(db
->expires
)) == NULL
) break; 
1585                 t 
= (time_t) dictGetEntryVal(de
); 
1587                     sds key 
= dictGetEntryKey(de
); 
1588                     robj 
*keyobj 
= createStringObject(key
,sdslen(key
)); 
1590                     dbDelete(db
,keyobj
); 
1591                     decrRefCount(keyobj
); 
1593                     server
.stat_expiredkeys
++; 
1596         } while (expired 
> REDIS_EXPIRELOOKUPS_PER_CRON
/4); 
1599     /* Swap a few keys on disk if we are over the memory limit and VM 
1600      * is enbled. Try to free objects from the free list first. */ 
1601     if (vmCanSwapOut()) { 
1602         while (server
.vm_enabled 
&& zmalloc_used_memory() > 
1603                 server
.vm_max_memory
) 
1607             if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
1608             retval 
= (server
.vm_max_threads 
== 0) ? 
1609                         vmSwapOneObjectBlocking() : 
1610                         vmSwapOneObjectThreaded(); 
1611             if (retval 
== REDIS_ERR 
&& !(loops 
% 300) && 
1612                 zmalloc_used_memory() > 
1613                 (server
.vm_max_memory
+server
.vm_max_memory
/10)) 
1615                 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!"); 
1617             /* Note that when using threade I/O we free just one object, 
1618              * because anyway when the I/O thread in charge to swap this 
1619              * object out will finish, the handler of completed jobs 
1620              * will try to swap more objects if we are still out of memory. */ 
1621             if (retval 
== REDIS_ERR 
|| server
.vm_max_threads 
> 0) break; 
1625     /* Check if we should connect to a MASTER */ 
1626     if (server
.replstate 
== REDIS_REPL_CONNECT 
&& !(loops 
% 10)) { 
1627         redisLog(REDIS_NOTICE
,"Connecting to MASTER..."); 
1628         if (syncWithMaster() == REDIS_OK
) { 
1629             redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded"); 
1630             if (server
.appendonly
) rewriteAppendOnlyFileBackground(); 
1636 /* This function gets called every time Redis is entering the 
1637  * main loop of the event driven library, that is, before to sleep 
1638  * for ready file descriptors. */ 
1639 static void beforeSleep(struct aeEventLoop 
*eventLoop
) { 
1640     REDIS_NOTUSED(eventLoop
); 
1642     /* Awake clients that got all the swapped keys they requested */ 
1643     if (server
.vm_enabled 
&& listLength(server
.io_ready_clients
)) { 
1647         listRewind(server
.io_ready_clients
,&li
); 
1648         while((ln 
= listNext(&li
))) { 
1649             redisClient 
*c 
= ln
->value
; 
1650             struct redisCommand 
*cmd
; 
1652             /* Resume the client. */ 
1653             listDelNode(server
.io_ready_clients
,ln
); 
1654             c
->flags 
&= (~REDIS_IO_WAIT
); 
1655             server
.vm_blocked_clients
--; 
1656             aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
1657                 readQueryFromClient
, c
); 
1658             cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
1659             assert(cmd 
!= NULL
); 
1662             /* There may be more data to process in the input buffer. */ 
1663             if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) 
1664                 processInputBuffer(c
); 
1667     /* Write the AOF buffer on disk */ 
1668     flushAppendOnlyFile(); 
1671 static void createSharedObjects(void) { 
1674     shared
.crlf 
= createObject(REDIS_STRING
,sdsnew("\r\n")); 
1675     shared
.ok 
= createObject(REDIS_STRING
,sdsnew("+OK\r\n")); 
1676     shared
.err 
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n")); 
1677     shared
.emptybulk 
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n")); 
1678     shared
.czero 
= createObject(REDIS_STRING
,sdsnew(":0\r\n")); 
1679     shared
.cone 
= createObject(REDIS_STRING
,sdsnew(":1\r\n")); 
1680     shared
.nullbulk 
= createObject(REDIS_STRING
,sdsnew("$-1\r\n")); 
1681     shared
.nullmultibulk 
= createObject(REDIS_STRING
,sdsnew("*-1\r\n")); 
1682     shared
.emptymultibulk 
= createObject(REDIS_STRING
,sdsnew("*0\r\n")); 
1683     shared
.pong 
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n")); 
1684     shared
.queued 
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n")); 
1685     shared
.wrongtypeerr 
= createObject(REDIS_STRING
,sdsnew( 
1686         "-ERR Operation against a key holding the wrong kind of value\r\n")); 
1687     shared
.nokeyerr 
= createObject(REDIS_STRING
,sdsnew( 
1688         "-ERR no such key\r\n")); 
1689     shared
.syntaxerr 
= createObject(REDIS_STRING
,sdsnew( 
1690         "-ERR syntax error\r\n")); 
1691     shared
.sameobjecterr 
= createObject(REDIS_STRING
,sdsnew( 
1692         "-ERR source and destination objects are the same\r\n")); 
1693     shared
.outofrangeerr 
= createObject(REDIS_STRING
,sdsnew( 
1694         "-ERR index out of range\r\n")); 
1695     shared
.space 
= createObject(REDIS_STRING
,sdsnew(" ")); 
1696     shared
.colon 
= createObject(REDIS_STRING
,sdsnew(":")); 
1697     shared
.plus 
= createObject(REDIS_STRING
,sdsnew("+")); 
1698     shared
.select0 
= createStringObject("select 0\r\n",10); 
1699     shared
.select1 
= createStringObject("select 1\r\n",10); 
1700     shared
.select2 
= createStringObject("select 2\r\n",10); 
1701     shared
.select3 
= createStringObject("select 3\r\n",10); 
1702     shared
.select4 
= createStringObject("select 4\r\n",10); 
1703     shared
.select5 
= createStringObject("select 5\r\n",10); 
1704     shared
.select6 
= createStringObject("select 6\r\n",10); 
1705     shared
.select7 
= createStringObject("select 7\r\n",10); 
1706     shared
.select8 
= createStringObject("select 8\r\n",10); 
1707     shared
.select9 
= createStringObject("select 9\r\n",10); 
1708     shared
.messagebulk 
= createStringObject("$7\r\nmessage\r\n",13); 
1709     shared
.pmessagebulk 
= createStringObject("$8\r\npmessage\r\n",14); 
1710     shared
.subscribebulk 
= createStringObject("$9\r\nsubscribe\r\n",15); 
1711     shared
.unsubscribebulk 
= createStringObject("$11\r\nunsubscribe\r\n",18); 
1712     shared
.psubscribebulk 
= createStringObject("$10\r\npsubscribe\r\n",17); 
1713     shared
.punsubscribebulk 
= createStringObject("$12\r\npunsubscribe\r\n",19); 
1714     shared
.mbulk3 
= createStringObject("*3\r\n",4); 
1715     shared
.mbulk4 
= createStringObject("*4\r\n",4); 
1716     for (j 
= 0; j 
< REDIS_SHARED_INTEGERS
; j
++) { 
1717         shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
); 
1718         shared
.integers
[j
]->encoding 
= REDIS_ENCODING_INT
; 
1722 static void appendServerSaveParams(time_t seconds
, int changes
) { 
1723     server
.saveparams 
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1)); 
1724     server
.saveparams
[server
.saveparamslen
].seconds 
= seconds
; 
1725     server
.saveparams
[server
.saveparamslen
].changes 
= changes
; 
1726     server
.saveparamslen
++; 
1729 static void resetServerSaveParams() { 
1730     zfree(server
.saveparams
); 
1731     server
.saveparams 
= NULL
; 
1732     server
.saveparamslen 
= 0; 
1735 static void initServerConfig() { 
1736     server
.dbnum 
= REDIS_DEFAULT_DBNUM
; 
1737     server
.port 
= REDIS_SERVERPORT
; 
1738     server
.verbosity 
= REDIS_VERBOSE
; 
1739     server
.maxidletime 
= REDIS_MAXIDLETIME
; 
1740     server
.saveparams 
= NULL
; 
1741     server
.logfile 
= NULL
; /* NULL = log on standard output */ 
1742     server
.bindaddr 
= NULL
; 
1743     server
.glueoutputbuf 
= 1; 
1744     server
.daemonize 
= 0; 
1745     server
.appendonly 
= 0; 
1746     server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1747     server
.no_appendfsync_on_rewrite 
= 0; 
1748     server
.lastfsync 
= time(NULL
); 
1749     server
.appendfd 
= -1; 
1750     server
.appendseldb 
= -1; /* Make sure the first time will not match */ 
1751     server
.pidfile 
= zstrdup("/var/run/redis.pid"); 
1752     server
.dbfilename 
= zstrdup("dump.rdb"); 
1753     server
.appendfilename 
= zstrdup("appendonly.aof"); 
1754     server
.requirepass 
= NULL
; 
1755     server
.rdbcompression 
= 1; 
1756     server
.activerehashing 
= 1; 
1757     server
.maxclients 
= 0; 
1758     server
.blpop_blocked_clients 
= 0; 
1759     server
.maxmemory 
= 0; 
1760     server
.vm_enabled 
= 0; 
1761     server
.vm_swap_file 
= zstrdup("/tmp/redis-%p.vm"); 
1762     server
.vm_page_size 
= 256;          /* 256 bytes per page */ 
1763     server
.vm_pages 
= 1024*1024*100;    /* 104 millions of pages */ 
1764     server
.vm_max_memory 
= 1024LL*1024*1024*1; /* 1 GB of RAM */ 
1765     server
.vm_max_threads 
= 4; 
1766     server
.vm_blocked_clients 
= 0; 
1767     server
.hash_max_zipmap_entries 
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
; 
1768     server
.hash_max_zipmap_value 
= REDIS_HASH_MAX_ZIPMAP_VALUE
; 
1769     server
.list_max_ziplist_entries 
= REDIS_LIST_MAX_ZIPLIST_ENTRIES
; 
1770     server
.list_max_ziplist_value 
= REDIS_LIST_MAX_ZIPLIST_VALUE
; 
1771     server
.shutdown_asap 
= 0; 
1773     resetServerSaveParams(); 
1775     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */ 
1776     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */ 
1777     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ 
1778     /* Replication related */ 
1780     server
.masterauth 
= NULL
; 
1781     server
.masterhost 
= NULL
; 
1782     server
.masterport 
= 6379; 
1783     server
.master 
= NULL
; 
1784     server
.replstate 
= REDIS_REPL_NONE
; 
1786     /* Double constants initialization */ 
1788     R_PosInf 
= 1.0/R_Zero
; 
1789     R_NegInf 
= -1.0/R_Zero
; 
1790     R_Nan 
= R_Zero
/R_Zero
; 
1793 static void initServer() { 
1796     signal(SIGHUP
, SIG_IGN
); 
1797     signal(SIGPIPE
, SIG_IGN
); 
1798     setupSigSegvAction(); 
1800     server
.devnull 
= fopen("/dev/null","w"); 
1801     if (server
.devnull 
== NULL
) { 
1802         redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
); 
1805     server
.clients 
= listCreate(); 
1806     server
.slaves 
= listCreate(); 
1807     server
.monitors 
= listCreate(); 
1808     server
.objfreelist 
= listCreate(); 
1809     createSharedObjects(); 
1810     server
.el 
= aeCreateEventLoop(); 
1811     server
.db 
= zmalloc(sizeof(redisDb
)*server
.dbnum
); 
1812     server
.fd 
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
); 
1813     if (server
.fd 
== -1) { 
1814         redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
); 
1817     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1818         server
.db
[j
].dict 
= dictCreate(&dbDictType
,NULL
); 
1819         server
.db
[j
].expires 
= dictCreate(&keyptrDictType
,NULL
); 
1820         server
.db
[j
].blocking_keys 
= dictCreate(&keylistDictType
,NULL
); 
1821         server
.db
[j
].watched_keys 
= dictCreate(&keylistDictType
,NULL
); 
1822         if (server
.vm_enabled
) 
1823             server
.db
[j
].io_keys 
= dictCreate(&keylistDictType
,NULL
); 
1824         server
.db
[j
].id 
= j
; 
1826     server
.pubsub_channels 
= dictCreate(&keylistDictType
,NULL
); 
1827     server
.pubsub_patterns 
= listCreate(); 
1828     listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
); 
1829     listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
); 
1830     server
.cronloops 
= 0; 
1831     server
.bgsavechildpid 
= -1; 
1832     server
.bgrewritechildpid 
= -1; 
1833     server
.bgrewritebuf 
= sdsempty(); 
1834     server
.aofbuf 
= sdsempty(); 
1835     server
.lastsave 
= time(NULL
); 
1837     server
.stat_numcommands 
= 0; 
1838     server
.stat_numconnections 
= 0; 
1839     server
.stat_expiredkeys 
= 0; 
1840     server
.stat_starttime 
= time(NULL
); 
1841     server
.unixtime 
= time(NULL
); 
1842     aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
); 
1843     if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
, 
1844         acceptHandler
, NULL
) == AE_ERR
) oom("creating file event"); 
1846     if (server
.appendonly
) { 
1847         server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
1848         if (server
.appendfd 
== -1) { 
1849             redisLog(REDIS_WARNING
, "Can't open the append-only file: %s", 
1855     if (server
.vm_enabled
) vmInit(); 
1858 /* Empty the whole database */ 
1859 static long long emptyDb() { 
1861     long long removed 
= 0; 
1863     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1864         removed 
+= dictSize(server
.db
[j
].dict
); 
1865         dictEmpty(server
.db
[j
].dict
); 
1866         dictEmpty(server
.db
[j
].expires
); 
1871 static int yesnotoi(char *s
) { 
1872     if (!strcasecmp(s
,"yes")) return 1; 
1873     else if (!strcasecmp(s
,"no")) return 0; 
1877 /* I agree, this is a very rudimental way to load a configuration... 
1878    will improve later if the config gets more complex */ 
1879 static void loadServerConfig(char *filename
) { 
1881     char buf
[REDIS_CONFIGLINE_MAX
+1], *err 
= NULL
; 
1885     if (filename
[0] == '-' && filename
[1] == '\0') 
1888         if ((fp 
= fopen(filename
,"r")) == NULL
) { 
1889             redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
); 
1894     while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) { 
1900         line 
= sdstrim(line
," \t\r\n"); 
1902         /* Skip comments and blank lines*/ 
1903         if (line
[0] == '#' || line
[0] == '\0') { 
1908         /* Split into arguments */ 
1909         argv 
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
); 
1910         sdstolower(argv
[0]); 
1912         /* Execute config directives */ 
1913         if (!strcasecmp(argv
[0],"timeout") && argc 
== 2) { 
1914             server
.maxidletime 
= atoi(argv
[1]); 
1915             if (server
.maxidletime 
< 0) { 
1916                 err 
= "Invalid timeout value"; goto loaderr
; 
1918         } else if (!strcasecmp(argv
[0],"port") && argc 
== 2) { 
1919             server
.port 
= atoi(argv
[1]); 
1920             if (server
.port 
< 1 || server
.port 
> 65535) { 
1921                 err 
= "Invalid port"; goto loaderr
; 
1923         } else if (!strcasecmp(argv
[0],"bind") && argc 
== 2) { 
1924             server
.bindaddr 
= zstrdup(argv
[1]); 
1925         } else if (!strcasecmp(argv
[0],"save") && argc 
== 3) { 
1926             int seconds 
= atoi(argv
[1]); 
1927             int changes 
= atoi(argv
[2]); 
1928             if (seconds 
< 1 || changes 
< 0) { 
1929                 err 
= "Invalid save parameters"; goto loaderr
; 
1931             appendServerSaveParams(seconds
,changes
); 
1932         } else if (!strcasecmp(argv
[0],"dir") && argc 
== 2) { 
1933             if (chdir(argv
[1]) == -1) { 
1934                 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s", 
1935                     argv
[1], strerror(errno
)); 
1938         } else if (!strcasecmp(argv
[0],"loglevel") && argc 
== 2) { 
1939             if (!strcasecmp(argv
[1],"debug")) server
.verbosity 
= REDIS_DEBUG
; 
1940             else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity 
= REDIS_VERBOSE
; 
1941             else if (!strcasecmp(argv
[1],"notice")) server
.verbosity 
= REDIS_NOTICE
; 
1942             else if (!strcasecmp(argv
[1],"warning")) server
.verbosity 
= REDIS_WARNING
; 
1944                 err 
= "Invalid log level. Must be one of debug, notice, warning"; 
1947         } else if (!strcasecmp(argv
[0],"logfile") && argc 
== 2) { 
1950             server
.logfile 
= zstrdup(argv
[1]); 
1951             if (!strcasecmp(server
.logfile
,"stdout")) { 
1952                 zfree(server
.logfile
); 
1953                 server
.logfile 
= NULL
; 
1955             if (server
.logfile
) { 
1956                 /* Test if we are able to open the file. The server will not 
1957                  * be able to abort just for this problem later... */ 
1958                 logfp 
= fopen(server
.logfile
,"a"); 
1959                 if (logfp 
== NULL
) { 
1960                     err 
= sdscatprintf(sdsempty(), 
1961                         "Can't open the log file: %s", strerror(errno
)); 
1966         } else if (!strcasecmp(argv
[0],"databases") && argc 
== 2) { 
1967             server
.dbnum 
= atoi(argv
[1]); 
1968             if (server
.dbnum 
< 1) { 
1969                 err 
= "Invalid number of databases"; goto loaderr
; 
1971         } else if (!strcasecmp(argv
[0],"include") && argc 
== 2) { 
1972             loadServerConfig(argv
[1]); 
1973         } else if (!strcasecmp(argv
[0],"maxclients") && argc 
== 2) { 
1974             server
.maxclients 
= atoi(argv
[1]); 
1975         } else if (!strcasecmp(argv
[0],"maxmemory") && argc 
== 2) { 
1976             server
.maxmemory 
= memtoll(argv
[1],NULL
); 
1977         } else if (!strcasecmp(argv
[0],"slaveof") && argc 
== 3) { 
1978             server
.masterhost 
= sdsnew(argv
[1]); 
1979             server
.masterport 
= atoi(argv
[2]); 
1980             server
.replstate 
= REDIS_REPL_CONNECT
; 
1981         } else if (!strcasecmp(argv
[0],"masterauth") && argc 
== 2) { 
1982                 server
.masterauth 
= zstrdup(argv
[1]); 
1983         } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc 
== 2) { 
1984             if ((server
.glueoutputbuf 
= yesnotoi(argv
[1])) == -1) { 
1985                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1987         } else if (!strcasecmp(argv
[0],"rdbcompression") && argc 
== 2) { 
1988             if ((server
.rdbcompression 
= yesnotoi(argv
[1])) == -1) { 
1989                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1991         } else if (!strcasecmp(argv
[0],"activerehashing") && argc 
== 2) { 
1992             if ((server
.activerehashing 
= yesnotoi(argv
[1])) == -1) { 
1993                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1995         } else if (!strcasecmp(argv
[0],"daemonize") && argc 
== 2) { 
1996             if ((server
.daemonize 
= yesnotoi(argv
[1])) == -1) { 
1997                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1999         } else if (!strcasecmp(argv
[0],"appendonly") && argc 
== 2) { 
2000             if ((server
.appendonly 
= yesnotoi(argv
[1])) == -1) { 
2001                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
2003         } else if (!strcasecmp(argv
[0],"appendfilename") && argc 
== 2) { 
2004             zfree(server
.appendfilename
); 
2005             server
.appendfilename 
= zstrdup(argv
[1]); 
2006         } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite") 
2008             if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) { 
2009                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
2011         } else if (!strcasecmp(argv
[0],"appendfsync") && argc 
== 2) { 
2012             if (!strcasecmp(argv
[1],"no")) { 
2013                 server
.appendfsync 
= APPENDFSYNC_NO
; 
2014             } else if (!strcasecmp(argv
[1],"always")) { 
2015                 server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
2016             } else if (!strcasecmp(argv
[1],"everysec")) { 
2017                 server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
2019                 err 
= "argument must be 'no', 'always' or 'everysec'"; 
2022         } else if (!strcasecmp(argv
[0],"requirepass") && argc 
== 2) { 
2023             server
.requirepass 
= zstrdup(argv
[1]); 
2024         } else if (!strcasecmp(argv
[0],"pidfile") && argc 
== 2) { 
2025             zfree(server
.pidfile
); 
2026             server
.pidfile 
= zstrdup(argv
[1]); 
2027         } else if (!strcasecmp(argv
[0],"dbfilename") && argc 
== 2) { 
2028             zfree(server
.dbfilename
); 
2029             server
.dbfilename 
= zstrdup(argv
[1]); 
2030         } else if (!strcasecmp(argv
[0],"vm-enabled") && argc 
== 2) { 
2031             if ((server
.vm_enabled 
= yesnotoi(argv
[1])) == -1) { 
2032                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
2034         } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc 
== 2) { 
2035             zfree(server
.vm_swap_file
); 
2036             server
.vm_swap_file 
= zstrdup(argv
[1]); 
2037         } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc 
== 2) { 
2038             server
.vm_max_memory 
= memtoll(argv
[1],NULL
); 
2039         } else if (!strcasecmp(argv
[0],"vm-page-size") && argc 
== 2) { 
2040             server
.vm_page_size 
= memtoll(argv
[1], NULL
); 
2041         } else if (!strcasecmp(argv
[0],"vm-pages") && argc 
== 2) { 
2042             server
.vm_pages 
= memtoll(argv
[1], NULL
); 
2043         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
2044             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
2045         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc 
== 2){ 
2046             server
.hash_max_zipmap_entries 
= memtoll(argv
[1], NULL
); 
2047         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc 
== 2){ 
2048             server
.hash_max_zipmap_value 
= memtoll(argv
[1], NULL
); 
2049         } else if (!strcasecmp(argv
[0],"list-max-ziplist-entries") && argc 
== 2){ 
2050             server
.list_max_ziplist_entries 
= memtoll(argv
[1], NULL
); 
2051         } else if (!strcasecmp(argv
[0],"list-max-ziplist-value") && argc 
== 2){ 
2052             server
.list_max_ziplist_value 
= memtoll(argv
[1], NULL
); 
2054             err 
= "Bad directive or wrong number of arguments"; goto loaderr
; 
2056         for (j 
= 0; j 
< argc
; j
++) 
2061     if (fp 
!= stdin
) fclose(fp
); 
2065     fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n"); 
2066     fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
); 
2067     fprintf(stderr
, ">>> '%s'\n", line
); 
2068     fprintf(stderr
, "%s\n", err
); 
2072 static void freeClientArgv(redisClient 
*c
) { 
2075     for (j 
= 0; j 
< c
->argc
; j
++) 
2076         decrRefCount(c
->argv
[j
]); 
2077     for (j 
= 0; j 
< c
->mbargc
; j
++) 
2078         decrRefCount(c
->mbargv
[j
]); 
2083 static void freeClient(redisClient 
*c
) { 
2086     /* Note that if the client we are freeing is blocked into a blocking 
2087      * call, we have to set querybuf to NULL *before* to call 
2088      * unblockClientWaitingData() to avoid processInputBuffer() will get 
2089      * called. Also it is important to remove the file events after 
2090      * this, because this call adds the READABLE event. */ 
2091     sdsfree(c
->querybuf
); 
2093     if (c
->flags 
& REDIS_BLOCKED
) 
2094         unblockClientWaitingData(c
); 
2096     /* UNWATCH all the keys */ 
2098     listRelease(c
->watched_keys
); 
2099     /* Unsubscribe from all the pubsub channels */ 
2100     pubsubUnsubscribeAllChannels(c
,0); 
2101     pubsubUnsubscribeAllPatterns(c
,0); 
2102     dictRelease(c
->pubsub_channels
); 
2103     listRelease(c
->pubsub_patterns
); 
2104     /* Obvious cleanup */ 
2105     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
2106     aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2107     listRelease(c
->reply
); 
2110     /* Remove from the list of clients */ 
2111     ln 
= listSearchKey(server
.clients
,c
); 
2112     redisAssert(ln 
!= NULL
); 
2113     listDelNode(server
.clients
,ln
); 
2114     /* Remove from the list of clients that are now ready to be restarted 
2115      * after waiting for swapped keys */ 
2116     if (c
->flags 
& REDIS_IO_WAIT 
&& listLength(c
->io_keys
) == 0) { 
2117         ln 
= listSearchKey(server
.io_ready_clients
,c
); 
2119             listDelNode(server
.io_ready_clients
,ln
); 
2120             server
.vm_blocked_clients
--; 
2123     /* Remove from the list of clients waiting for swapped keys */ 
2124     while (server
.vm_enabled 
&& listLength(c
->io_keys
)) { 
2125         ln 
= listFirst(c
->io_keys
); 
2126         dontWaitForSwappedKey(c
,ln
->value
); 
2128     listRelease(c
->io_keys
); 
2129     /* Master/slave cleanup */ 
2130     if (c
->flags 
& REDIS_SLAVE
) { 
2131         if (c
->replstate 
== REDIS_REPL_SEND_BULK 
&& c
->repldbfd 
!= -1) 
2133         list 
*l 
= (c
->flags 
& REDIS_MONITOR
) ? server
.monitors 
: server
.slaves
; 
2134         ln 
= listSearchKey(l
,c
); 
2135         redisAssert(ln 
!= NULL
); 
2138     if (c
->flags 
& REDIS_MASTER
) { 
2139         server
.master 
= NULL
; 
2140         server
.replstate 
= REDIS_REPL_CONNECT
; 
2142     /* Release memory */ 
2145     freeClientMultiState(c
); 
2149 #define GLUEREPLY_UP_TO (1024) 
2150 static void glueReplyBuffersIfNeeded(redisClient 
*c
) { 
2152     char buf
[GLUEREPLY_UP_TO
]; 
2157     listRewind(c
->reply
,&li
); 
2158     while((ln 
= listNext(&li
))) { 
2162         objlen 
= sdslen(o
->ptr
); 
2163         if (copylen 
+ objlen 
<= GLUEREPLY_UP_TO
) { 
2164             memcpy(buf
+copylen
,o
->ptr
,objlen
); 
2166             listDelNode(c
->reply
,ln
); 
2168             if (copylen 
== 0) return; 
2172     /* Now the output buffer is empty, add the new single element */ 
2173     o 
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
)); 
2174     listAddNodeHead(c
->reply
,o
); 
2177 static void sendReplyToClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2178     redisClient 
*c 
= privdata
; 
2179     int nwritten 
= 0, totwritten 
= 0, objlen
; 
2182     REDIS_NOTUSED(mask
); 
2184     /* Use writev() if we have enough buffers to send */ 
2185     if (!server
.glueoutputbuf 
&& 
2186         listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD 
&& 
2187         !(c
->flags 
& REDIS_MASTER
)) 
2189         sendReplyToClientWritev(el
, fd
, privdata
, mask
); 
2193     while(listLength(c
->reply
)) { 
2194         if (server
.glueoutputbuf 
&& listLength(c
->reply
) > 1) 
2195             glueReplyBuffersIfNeeded(c
); 
2197         o 
= listNodeValue(listFirst(c
->reply
)); 
2198         objlen 
= sdslen(o
->ptr
); 
2201             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2205         if (c
->flags 
& REDIS_MASTER
) { 
2206             /* Don't reply to a master */ 
2207             nwritten 
= objlen 
- c
->sentlen
; 
2209             nwritten 
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen 
- c
->sentlen
); 
2210             if (nwritten 
<= 0) break; 
2212         c
->sentlen 
+= nwritten
; 
2213         totwritten 
+= nwritten
; 
2214         /* If we fully sent the object on head go to the next one */ 
2215         if (c
->sentlen 
== objlen
) { 
2216             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2219         /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT 
2220          * bytes, in a single threaded server it's a good idea to serve 
2221          * other clients as well, even if a very large request comes from 
2222          * super fast link that is always able to accept data (in real world 
2223          * scenario think about 'KEYS *' against the loopback interfae) */ 
2224         if (totwritten 
> REDIS_MAX_WRITE_PER_EVENT
) break; 
2226     if (nwritten 
== -1) { 
2227         if (errno 
== EAGAIN
) { 
2230             redisLog(REDIS_VERBOSE
, 
2231                 "Error writing to client: %s", strerror(errno
)); 
2236     if (totwritten 
> 0) c
->lastinteraction 
= time(NULL
); 
2237     if (listLength(c
->reply
) == 0) { 
2239         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2243 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) 
2245     redisClient 
*c 
= privdata
; 
2246     int nwritten 
= 0, totwritten 
= 0, objlen
, willwrite
; 
2248     struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
]; 
2249     int offset
, ion 
= 0; 
2251     REDIS_NOTUSED(mask
); 
2254     while (listLength(c
->reply
)) { 
2255         offset 
= c
->sentlen
; 
2259         /* fill-in the iov[] array */ 
2260         for(node 
= listFirst(c
->reply
); node
; node 
= listNextNode(node
)) { 
2261             o 
= listNodeValue(node
); 
2262             objlen 
= sdslen(o
->ptr
); 
2264             if (totwritten 
+ objlen 
- offset 
> REDIS_MAX_WRITE_PER_EVENT
) 
2267             if(ion 
== REDIS_WRITEV_IOVEC_COUNT
) 
2268                 break; /* no more iovecs */ 
2270             iov
[ion
].iov_base 
= ((char*)o
->ptr
) + offset
; 
2271             iov
[ion
].iov_len 
= objlen 
- offset
; 
2272             willwrite 
+= objlen 
- offset
; 
2273             offset 
= 0; /* just for the first item */ 
2280         /* write all collected blocks at once */ 
2281         if((nwritten 
= writev(fd
, iov
, ion
)) < 0) { 
2282             if (errno 
!= EAGAIN
) { 
2283                 redisLog(REDIS_VERBOSE
, 
2284                          "Error writing to client: %s", strerror(errno
)); 
2291         totwritten 
+= nwritten
; 
2292         offset 
= c
->sentlen
; 
2294         /* remove written robjs from c->reply */ 
2295         while (nwritten 
&& listLength(c
->reply
)) { 
2296             o 
= listNodeValue(listFirst(c
->reply
)); 
2297             objlen 
= sdslen(o
->ptr
); 
2299             if(nwritten 
>= objlen 
- offset
) { 
2300                 listDelNode(c
->reply
, listFirst(c
->reply
)); 
2301                 nwritten 
-= objlen 
- offset
; 
2305                 c
->sentlen 
+= nwritten
; 
2313         c
->lastinteraction 
= time(NULL
); 
2315     if (listLength(c
->reply
) == 0) { 
2317         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2321 static int qsortRedisCommands(const void *r1
, const void *r2
) { 
2323         ((struct redisCommand
*)r1
)->name
, 
2324         ((struct redisCommand
*)r2
)->name
); 
2327 static void sortCommandTable() { 
2328     /* Copy and sort the read-only version of the command table */ 
2329     commandTable 
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
)); 
2330     memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
)); 
2332         sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
), 
2333         sizeof(struct redisCommand
),qsortRedisCommands
); 
2336 static struct redisCommand 
*lookupCommand(char *name
) { 
2337     struct redisCommand tmp 
= {name
,NULL
,0,0,NULL
,0,0,0}; 
2341         sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
), 
2342         sizeof(struct redisCommand
), 
2343         qsortRedisCommands
); 
2346 /* resetClient prepare the client to process the next command */ 
2347 static void resetClient(redisClient 
*c
) { 
2353 /* Call() is the core of Redis execution of a command */ 
2354 static void call(redisClient 
*c
, struct redisCommand 
*cmd
) { 
2357     dirty 
= server
.dirty
; 
2359     dirty 
= server
.dirty
-dirty
; 
2361     if (server
.appendonly 
&& dirty
) 
2362         feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2363     if ((dirty 
|| cmd
->flags 
& REDIS_CMD_FORCE_REPLICATION
) && 
2364         listLength(server
.slaves
)) 
2365         replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
); 
2366     if (listLength(server
.monitors
)) 
2367         replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
); 
2368     server
.stat_numcommands
++; 
2371 /* If this function gets called we already read a whole 
2372  * command, argments are in the client argv/argc fields. 
2373  * processCommand() execute the command or prepare the 
2374  * server for a bulk read from the client. 
2376  * If 1 is returned the client is still alive and valid and 
2377  * and other operations can be performed by the caller. Otherwise 
2378  * if 0 is returned the client was destroied (i.e. after QUIT). */ 
2379 static int processCommand(redisClient 
*c
) { 
2380     struct redisCommand 
*cmd
; 
2382     /* Free some memory if needed (maxmemory setting) */ 
2383     if (server
.maxmemory
) freeMemoryIfNeeded(); 
2385     /* Handle the multi bulk command type. This is an alternative protocol 
2386      * supported by Redis in order to receive commands that are composed of 
2387      * multiple binary-safe "bulk" arguments. The latency of processing is 
2388      * a bit higher but this allows things like multi-sets, so if this 
2389      * protocol is used only for MSET and similar commands this is a big win. */ 
2390     if (c
->multibulk 
== 0 && c
->argc 
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') { 
2391         c
->multibulk 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2392         if (c
->multibulk 
<= 0) { 
2396             decrRefCount(c
->argv
[c
->argc
-1]); 
2400     } else if (c
->multibulk
) { 
2401         if (c
->bulklen 
== -1) { 
2402             if (((char*)c
->argv
[0]->ptr
)[0] != '$') { 
2403                 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n")); 
2407                 int bulklen 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2408                 decrRefCount(c
->argv
[0]); 
2409                 if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2411                     addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2416                 c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2420             c
->mbargv 
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1)); 
2421             c
->mbargv
[c
->mbargc
] = c
->argv
[0]; 
2425             if (c
->multibulk 
== 0) { 
2429                 /* Here we need to swap the multi-bulk argc/argv with the 
2430                  * normal argc/argv of the client structure. */ 
2432                 c
->argv 
= c
->mbargv
; 
2433                 c
->mbargv 
= auxargv
; 
2436                 c
->argc 
= c
->mbargc
; 
2437                 c
->mbargc 
= auxargc
; 
2439                 /* We need to set bulklen to something different than -1 
2440                  * in order for the code below to process the command without 
2441                  * to try to read the last argument of a bulk command as 
2442                  * a special argument. */ 
2444                 /* continue below and process the command */ 
2451     /* -- end of multi bulk commands processing -- */ 
2453     /* The QUIT command is handled as a special case. Normal command 
2454      * procs are unable to close the client connection safely */ 
2455     if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) { 
2460     /* Now lookup the command and check ASAP about trivial error conditions 
2461      * such wrong arity, bad command name and so forth. */ 
2462     cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
2465             sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", 
2466                 (char*)c
->argv
[0]->ptr
)); 
2469     } else if ((cmd
->arity 
> 0 && cmd
->arity 
!= c
->argc
) || 
2470                (c
->argc 
< -cmd
->arity
)) { 
2472             sdscatprintf(sdsempty(), 
2473                 "-ERR wrong number of arguments for '%s' command\r\n", 
2477     } else if (cmd
->flags 
& REDIS_CMD_BULK 
&& c
->bulklen 
== -1) { 
2478         /* This is a bulk command, we have to read the last argument yet. */ 
2479         int bulklen 
= atoi(c
->argv
[c
->argc
-1]->ptr
); 
2481         decrRefCount(c
->argv
[c
->argc
-1]); 
2482         if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2484             addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2489         c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2490         /* It is possible that the bulk read is already in the 
2491          * buffer. Check this condition and handle it accordingly. 
2492          * This is just a fast path, alternative to call processInputBuffer(). 
2493          * It's a good idea since the code is small and this condition 
2494          * happens most of the times. */ 
2495         if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) { 
2496             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2498             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2500             /* Otherwise return... there is to read the last argument 
2501              * from the socket. */ 
2505     /* Let's try to encode the bulk object to save space. */ 
2506     if (cmd
->flags 
& REDIS_CMD_BULK
) 
2507         c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]); 
2509     /* Check if the user is authenticated */ 
2510     if (server
.requirepass 
&& !c
->authenticated 
&& cmd
->proc 
!= authCommand
) { 
2511         addReplySds(c
,sdsnew("-ERR operation not permitted\r\n")); 
2516     /* Handle the maxmemory directive */ 
2517     if (server
.maxmemory 
&& (cmd
->flags 
& REDIS_CMD_DENYOOM
) && 
2518         zmalloc_used_memory() > server
.maxmemory
) 
2520         addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); 
2525     /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ 
2526     if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0) 
2528         cmd
->proc 
!= subscribeCommand 
&& cmd
->proc 
!= unsubscribeCommand 
&& 
2529         cmd
->proc 
!= psubscribeCommand 
&& cmd
->proc 
!= punsubscribeCommand
) { 
2530         addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n")); 
2535     /* Exec the command */ 
2536     if (c
->flags 
& REDIS_MULTI 
&& 
2537         cmd
->proc 
!= execCommand 
&& cmd
->proc 
!= discardCommand 
&& 
2538         cmd
->proc 
!= multiCommand 
&& cmd
->proc 
!= watchCommand
) 
2540         queueMultiCommand(c
,cmd
); 
2541         addReply(c
,shared
.queued
); 
2543         if (server
.vm_enabled 
&& server
.vm_max_threads 
> 0 && 
2544             blockClientOnSwappedKeys(c
,cmd
)) return 1; 
2548     /* Prepare the client for the next command */ 
2553 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
) { 
2558     /* We need 1+(ARGS*3) objects since commands are using the new protocol 
2559      * and we one 1 object for the first "*<count>\r\n" multibulk count, then 
2560      * for every additional object we have "$<count>\r\n" + object + "\r\n". */ 
2561     robj 
*static_outv
[REDIS_STATIC_ARGS
*3+1]; 
2564     if (argc 
<= REDIS_STATIC_ARGS
) { 
2567         outv 
= zmalloc(sizeof(robj
*)*(argc
*3+1)); 
2570     lenobj 
= createObject(REDIS_STRING
, 
2571             sdscatprintf(sdsempty(), "*%d\r\n", argc
)); 
2572     lenobj
->refcount 
= 0; 
2573     outv
[outc
++] = lenobj
; 
2574     for (j 
= 0; j 
< argc
; j
++) { 
2575         lenobj 
= createObject(REDIS_STRING
, 
2576             sdscatprintf(sdsempty(),"$%lu\r\n", 
2577                 (unsigned long) stringObjectLen(argv
[j
]))); 
2578         lenobj
->refcount 
= 0; 
2579         outv
[outc
++] = lenobj
; 
2580         outv
[outc
++] = argv
[j
]; 
2581         outv
[outc
++] = shared
.crlf
; 
2584     /* Increment all the refcounts at start and decrement at end in order to 
2585      * be sure to free objects if there is no slave in a replication state 
2586      * able to be feed with commands */ 
2587     for (j 
= 0; j 
< outc
; j
++) incrRefCount(outv
[j
]); 
2588     listRewind(slaves
,&li
); 
2589     while((ln 
= listNext(&li
))) { 
2590         redisClient 
*slave 
= ln
->value
; 
2592         /* Don't feed slaves that are still waiting for BGSAVE to start */ 
2593         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) continue; 
2595         /* Feed all the other slaves, MONITORs and so on */ 
2596         if (slave
->slaveseldb 
!= dictid
) { 
2600             case 0: selectcmd 
= shared
.select0
; break; 
2601             case 1: selectcmd 
= shared
.select1
; break; 
2602             case 2: selectcmd 
= shared
.select2
; break; 
2603             case 3: selectcmd 
= shared
.select3
; break; 
2604             case 4: selectcmd 
= shared
.select4
; break; 
2605             case 5: selectcmd 
= shared
.select5
; break; 
2606             case 6: selectcmd 
= shared
.select6
; break; 
2607             case 7: selectcmd 
= shared
.select7
; break; 
2608             case 8: selectcmd 
= shared
.select8
; break; 
2609             case 9: selectcmd 
= shared
.select9
; break; 
2611                 selectcmd 
= createObject(REDIS_STRING
, 
2612                     sdscatprintf(sdsempty(),"select %d\r\n",dictid
)); 
2613                 selectcmd
->refcount 
= 0; 
2616             addReply(slave
,selectcmd
); 
2617             slave
->slaveseldb 
= dictid
; 
2619         for (j 
= 0; j 
< outc
; j
++) addReply(slave
,outv
[j
]); 
2621     for (j 
= 0; j 
< outc
; j
++) decrRefCount(outv
[j
]); 
2622     if (outv 
!= static_outv
) zfree(outv
); 
2625 static sds 
sdscatrepr(sds s
, char *p
, size_t len
) { 
2626     s 
= sdscatlen(s
,"\"",1); 
2631             s 
= sdscatprintf(s
,"\\%c",*p
); 
2633         case '\n': s 
= sdscatlen(s
,"\\n",1); break; 
2634         case '\r': s 
= sdscatlen(s
,"\\r",1); break; 
2635         case '\t': s 
= sdscatlen(s
,"\\t",1); break; 
2636         case '\a': s 
= sdscatlen(s
,"\\a",1); break; 
2637         case '\b': s 
= sdscatlen(s
,"\\b",1); break; 
2640                 s 
= sdscatprintf(s
,"%c",*p
); 
2642                 s 
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
); 
2647     return sdscatlen(s
,"\"",1); 
2650 static void replicationFeedMonitors(list 
*monitors
, int dictid
, robj 
**argv
, int argc
) { 
2654     sds cmdrepr 
= sdsnew("+"); 
2658     gettimeofday(&tv
,NULL
); 
2659     cmdrepr 
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
); 
2660     if (dictid 
!= 0) cmdrepr 
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
); 
2662     for (j 
= 0; j 
< argc
; j
++) { 
2663         if (argv
[j
]->encoding 
== REDIS_ENCODING_INT
) { 
2664             cmdrepr 
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
); 
2666             cmdrepr 
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
, 
2667                         sdslen(argv
[j
]->ptr
)); 
2670             cmdrepr 
= sdscatlen(cmdrepr
," ",1); 
2672     cmdrepr 
= sdscatlen(cmdrepr
,"\r\n",2); 
2673     cmdobj 
= createObject(REDIS_STRING
,cmdrepr
); 
2675     listRewind(monitors
,&li
); 
2676     while((ln 
= listNext(&li
))) { 
2677         redisClient 
*monitor 
= ln
->value
; 
2678         addReply(monitor
,cmdobj
); 
2680     decrRefCount(cmdobj
); 
2683 static void processInputBuffer(redisClient 
*c
) { 
2685     /* Before to process the input buffer, make sure the client is not 
2686      * waitig for a blocking operation such as BLPOP. Note that the first 
2687      * iteration the client is never blocked, otherwise the processInputBuffer 
2688      * would not be called at all, but after the execution of the first commands 
2689      * in the input buffer the client may be blocked, and the "goto again" 
2690      * will try to reiterate. The following line will make it return asap. */ 
2691     if (c
->flags 
& REDIS_BLOCKED 
|| c
->flags 
& REDIS_IO_WAIT
) return; 
2692     if (c
->bulklen 
== -1) { 
2693         /* Read the first line of the query */ 
2694         char *p 
= strchr(c
->querybuf
,'\n'); 
2701             query 
= c
->querybuf
; 
2702             c
->querybuf 
= sdsempty(); 
2703             querylen 
= 1+(p
-(query
)); 
2704             if (sdslen(query
) > querylen
) { 
2705                 /* leave data after the first line of the query in the buffer */ 
2706                 c
->querybuf 
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
); 
2708             *p 
= '\0'; /* remove "\n" */ 
2709             if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */ 
2710             sdsupdatelen(query
); 
2712             /* Now we can split the query in arguments */ 
2713             argv 
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
); 
2716             if (c
->argv
) zfree(c
->argv
); 
2717             c
->argv 
= zmalloc(sizeof(robj
*)*argc
); 
2719             for (j 
= 0; j 
< argc
; j
++) { 
2720                 if (sdslen(argv
[j
])) { 
2721                     c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]); 
2729                 /* Execute the command. If the client is still valid 
2730                  * after processCommand() return and there is something 
2731                  * on the query buffer try to process the next command. */ 
2732                 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2734                 /* Nothing to process, argc == 0. Just process the query 
2735                  * buffer if it's not empty or return to the caller */ 
2736                 if (sdslen(c
->querybuf
)) goto again
; 
2739         } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) { 
2740             redisLog(REDIS_VERBOSE
, "Client protocol error"); 
2745         /* Bulk read handling. Note that if we are at this point 
2746            the client already sent a command terminated with a newline, 
2747            we are reading the bulk data that is actually the last 
2748            argument of the command. */ 
2749         int qbl 
= sdslen(c
->querybuf
); 
2751         if (c
->bulklen 
<= qbl
) { 
2752             /* Copy everything but the final CRLF as final argument */ 
2753             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2755             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2756             /* Process the command. If the client is still valid after 
2757              * the processing and there is more data in the buffer 
2758              * try to parse it. */ 
2759             if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2765 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2766     redisClient 
*c 
= (redisClient
*) privdata
; 
2767     char buf
[REDIS_IOBUF_LEN
]; 
2770     REDIS_NOTUSED(mask
); 
2772     nread 
= read(fd
, buf
, REDIS_IOBUF_LEN
); 
2774         if (errno 
== EAGAIN
) { 
2777             redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
)); 
2781     } else if (nread 
== 0) { 
2782         redisLog(REDIS_VERBOSE
, "Client closed connection"); 
2787         c
->querybuf 
= sdscatlen(c
->querybuf
, buf
, nread
); 
2788         c
->lastinteraction 
= time(NULL
); 
2792     processInputBuffer(c
); 
2795 static int selectDb(redisClient 
*c
, int id
) { 
2796     if (id 
< 0 || id 
>= server
.dbnum
) 
2798     c
->db 
= &server
.db
[id
]; 
2802 static void *dupClientReplyValue(void *o
) { 
2803     incrRefCount((robj
*)o
); 
2807 static int listMatchObjects(void *a
, void *b
) { 
2808     return equalStringObjects(a
,b
); 
2811 static redisClient 
*createClient(int fd
) { 
2812     redisClient 
*c 
= zmalloc(sizeof(*c
)); 
2814     anetNonBlock(NULL
,fd
); 
2815     anetTcpNoDelay(NULL
,fd
); 
2816     if (!c
) return NULL
; 
2819     c
->querybuf 
= sdsempty(); 
2828     c
->lastinteraction 
= time(NULL
); 
2829     c
->authenticated 
= 0; 
2830     c
->replstate 
= REDIS_REPL_NONE
; 
2831     c
->reply 
= listCreate(); 
2832     listSetFreeMethod(c
->reply
,decrRefCount
); 
2833     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
2834     c
->blocking_keys 
= NULL
; 
2835     c
->blocking_keys_num 
= 0; 
2836     c
->io_keys 
= listCreate(); 
2837     c
->watched_keys 
= listCreate(); 
2838     listSetFreeMethod(c
->io_keys
,decrRefCount
); 
2839     c
->pubsub_channels 
= dictCreate(&setDictType
,NULL
); 
2840     c
->pubsub_patterns 
= listCreate(); 
2841     listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
); 
2842     listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
); 
2843     if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
2844         readQueryFromClient
, c
) == AE_ERR
) { 
2848     listAddNodeTail(server
.clients
,c
); 
2849     initClientMultiState(c
); 
2853 static void addReply(redisClient 
*c
, robj 
*obj
) { 
2854     if (listLength(c
->reply
) == 0 && 
2855         (c
->replstate 
== REDIS_REPL_NONE 
|| 
2856          c
->replstate 
== REDIS_REPL_ONLINE
) && 
2857         aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
, 
2858         sendReplyToClient
, c
) == AE_ERR
) return; 
2860     if (server
.vm_enabled 
&& obj
->storage 
!= REDIS_VM_MEMORY
) { 
2861         obj 
= dupStringObject(obj
); 
2862         obj
->refcount 
= 0; /* getDecodedObject() will increment the refcount */ 
2864     listAddNodeTail(c
->reply
,getDecodedObject(obj
)); 
2867 static void addReplySds(redisClient 
*c
, sds s
) { 
2868     robj 
*o 
= createObject(REDIS_STRING
,s
); 
2873 static void addReplyDouble(redisClient 
*c
, double d
) { 
2876     snprintf(buf
,sizeof(buf
),"%.17g",d
); 
2877     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", 
2878         (unsigned long) strlen(buf
),buf
)); 
2881 static void addReplyLongLong(redisClient 
*c
, long long ll
) { 
2886         addReply(c
,shared
.czero
); 
2888     } else if (ll 
== 1) { 
2889         addReply(c
,shared
.cone
); 
2893     len 
= ll2string(buf
+1,sizeof(buf
)-1,ll
); 
2896     addReplySds(c
,sdsnewlen(buf
,len
+3)); 
2899 static void addReplyUlong(redisClient 
*c
, unsigned long ul
) { 
2904         addReply(c
,shared
.czero
); 
2906     } else if (ul 
== 1) { 
2907         addReply(c
,shared
.cone
); 
2910     len 
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
); 
2911     addReplySds(c
,sdsnewlen(buf
,len
)); 
2914 static void addReplyBulkLen(redisClient 
*c
, robj 
*obj
) { 
2918     if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
2919         len 
= sdslen(obj
->ptr
); 
2921         long n 
= (long)obj
->ptr
; 
2923         /* Compute how many bytes will take this integer as a radix 10 string */ 
2929         while((n 
= n
/10) != 0) { 
2934     intlen 
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
); 
2935     buf
[intlen
+1] = '\r'; 
2936     buf
[intlen
+2] = '\n'; 
2937     addReplySds(c
,sdsnewlen(buf
,intlen
+3)); 
2940 static void addReplyBulk(redisClient 
*c
, robj 
*obj
) { 
2941     addReplyBulkLen(c
,obj
); 
2943     addReply(c
,shared
.crlf
); 
2946 static void addReplyBulkSds(redisClient 
*c
, sds s
) { 
2947     robj 
*o 
= createStringObject(s
, sdslen(s
)); 
2952 /* In the CONFIG command we need to add vanilla C string as bulk replies */ 
2953 static void addReplyBulkCString(redisClient 
*c
, char *s
) { 
2955         addReply(c
,shared
.nullbulk
); 
2957         robj 
*o 
= createStringObject(s
,strlen(s
)); 
2963 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2968     REDIS_NOTUSED(mask
); 
2969     REDIS_NOTUSED(privdata
); 
2971     cfd 
= anetAccept(server
.neterr
, fd
, cip
, &cport
); 
2972     if (cfd 
== AE_ERR
) { 
2973         redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
); 
2976     redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
); 
2977     if ((c 
= createClient(cfd
)) == NULL
) { 
2978         redisLog(REDIS_WARNING
,"Error allocating resoures for the client"); 
2979         close(cfd
); /* May be already closed, just ingore errors */ 
2982     /* If maxclient directive is set and this is one client more... close the 
2983      * connection. Note that we create the client instead to check before 
2984      * for this condition, since now the socket is already set in nonblocking 
2985      * mode and we can send an error for free using the Kernel I/O */ 
2986     if (server
.maxclients 
&& listLength(server
.clients
) > server
.maxclients
) { 
2987         char *err 
= "-ERR max number of clients reached\r\n"; 
2989         /* That's a best effort error message, don't check write errors */ 
2990         if (write(c
->fd
,err
,strlen(err
)) == -1) { 
2991             /* Nothing to do, Just to avoid the warning... */ 
2996     server
.stat_numconnections
++; 
2999 /* ======================= Redis objects implementation ===================== */ 
3001 static robj 
*createObject(int type
, void *ptr
) { 
3004     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
3005     if (listLength(server
.objfreelist
)) { 
3006         listNode 
*head 
= listFirst(server
.objfreelist
); 
3007         o 
= listNodeValue(head
); 
3008         listDelNode(server
.objfreelist
,head
); 
3009         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3011         if (server
.vm_enabled
) 
3012             pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3013         o 
= zmalloc(sizeof(*o
)); 
3016     o
->encoding 
= REDIS_ENCODING_RAW
; 
3019     if (server
.vm_enabled
) { 
3020         /* Note that this code may run in the context of an I/O thread 
3021          * and accessing server.lruclock in theory is an error 
3022          * (no locks). But in practice this is safe, and even if we read 
3023          * garbage Redis will not fail. */ 
3024         o
->lru 
= server
.lruclock
; 
3025         o
->storage 
= REDIS_VM_MEMORY
; 
3030 static robj 
*createStringObject(char *ptr
, size_t len
) { 
3031     return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
)); 
3034 static robj 
*createStringObjectFromLongLong(long long value
) { 
3036     if (value 
>= 0 && value 
< REDIS_SHARED_INTEGERS
) { 
3037         incrRefCount(shared
.integers
[value
]); 
3038         o 
= shared
.integers
[value
]; 
3040         if (value 
>= LONG_MIN 
&& value 
<= LONG_MAX
) { 
3041             o 
= createObject(REDIS_STRING
, NULL
); 
3042             o
->encoding 
= REDIS_ENCODING_INT
; 
3043             o
->ptr 
= (void*)((long)value
); 
3045             o 
= createObject(REDIS_STRING
,sdsfromlonglong(value
)); 
3051 static robj 
*dupStringObject(robj 
*o
) { 
3052     assert(o
->encoding 
== REDIS_ENCODING_RAW
); 
3053     return createStringObject(o
->ptr
,sdslen(o
->ptr
)); 
3056 static robj 
*createListObject(void) { 
3057     list 
*l 
= listCreate(); 
3058     robj 
*o 
= createObject(REDIS_LIST
,l
); 
3059     listSetFreeMethod(l
,decrRefCount
); 
3060     o
->encoding 
= REDIS_ENCODING_LIST
; 
3064 static robj 
*createZiplistObject(void) { 
3065     unsigned char *zl 
= ziplistNew(); 
3066     robj 
*o 
= createObject(REDIS_LIST
,zl
); 
3067     o
->encoding 
= REDIS_ENCODING_ZIPLIST
; 
3071 static robj 
*createSetObject(void) { 
3072     dict 
*d 
= dictCreate(&setDictType
,NULL
); 
3073     return createObject(REDIS_SET
,d
); 
3076 static robj 
*createHashObject(void) { 
3077     /* All the Hashes start as zipmaps. Will be automatically converted 
3078      * into hash tables if there are enough elements or big elements 
3080     unsigned char *zm 
= zipmapNew(); 
3081     robj 
*o 
= createObject(REDIS_HASH
,zm
); 
3082     o
->encoding 
= REDIS_ENCODING_ZIPMAP
; 
3086 static robj 
*createZsetObject(void) { 
3087     zset 
*zs 
= zmalloc(sizeof(*zs
)); 
3089     zs
->dict 
= dictCreate(&zsetDictType
,NULL
); 
3090     zs
->zsl 
= zslCreate(); 
3091     return createObject(REDIS_ZSET
,zs
); 
3094 static void freeStringObject(robj 
*o
) { 
3095     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3100 static void freeListObject(robj 
*o
) { 
3101     switch (o
->encoding
) { 
3102     case REDIS_ENCODING_LIST
: 
3103         listRelease((list
*) o
->ptr
); 
3105     case REDIS_ENCODING_ZIPLIST
: 
3109         redisPanic("Unknown list encoding type"); 
3113 static void freeSetObject(robj 
*o
) { 
3114     dictRelease((dict
*) o
->ptr
); 
3117 static void freeZsetObject(robj 
*o
) { 
3120     dictRelease(zs
->dict
); 
3125 static void freeHashObject(robj 
*o
) { 
3126     switch (o
->encoding
) { 
3127     case REDIS_ENCODING_HT
: 
3128         dictRelease((dict
*) o
->ptr
); 
3130     case REDIS_ENCODING_ZIPMAP
: 
3134         redisPanic("Unknown hash encoding type"); 
3139 static void incrRefCount(robj 
*o
) { 
3143 static void decrRefCount(void *obj
) { 
3146     /* Object is a swapped out value, or in the process of being loaded. */ 
3147     if (server
.vm_enabled 
&& 
3148         (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
)) 
3150         vmpointer 
*vp 
= obj
; 
3151         if (o
->storage 
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(o
); 
3152         vmMarkPagesFree(vp
->page
,vp
->usedpages
); 
3153         server
.vm_stats_swapped_objects
--; 
3158     if (o
->refcount 
<= 0) redisPanic("decrRefCount against refcount <= 0"); 
3159     /* Object is in memory, or in the process of being swapped out. 
3161      * If the object is being swapped out, abort the operation on 
3162      * decrRefCount even if the refcount does not drop to 0: the object 
3163      * is referenced at least two times, as value of the key AND as 
3164      * job->val in the iojob. So if we don't invalidate the iojob, when it is 
3165      * done but the relevant key was removed in the meantime, the 
3166      * complete jobs handler will not find the key about the job and the 
3167      * assert will fail. */ 
3168     if (server
.vm_enabled 
&& o
->storage 
== REDIS_VM_SWAPPING
) 
3169         vmCancelThreadedIOJob(o
); 
3170     if (--(o
->refcount
) == 0) { 
3172         case REDIS_STRING
: freeStringObject(o
); break; 
3173         case REDIS_LIST
: freeListObject(o
); break; 
3174         case REDIS_SET
: freeSetObject(o
); break; 
3175         case REDIS_ZSET
: freeZsetObject(o
); break; 
3176         case REDIS_HASH
: freeHashObject(o
); break; 
3177         default: redisPanic("Unknown object type"); break; 
3179         if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
3180         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
3181             !listAddNodeHead(server
.objfreelist
,o
)) 
3183         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3187 static int checkType(redisClient 
*c
, robj 
*o
, int type
) { 
3188     if (o
->type 
!= type
) { 
3189         addReply(c
,shared
.wrongtypeerr
); 
3195 /* Check if the nul-terminated string 's' can be represented by a long 
3196  * (that is, is a number that fits into long without any other space or 
3197  * character before or after the digits). 
3199  * If so, the function returns REDIS_OK and *longval is set to the value 
3200  * of the number. Otherwise REDIS_ERR is returned */ 
3201 static int isStringRepresentableAsLong(sds s
, long *longval
) { 
3202     char buf
[32], *endptr
; 
3206     value 
= strtol(s
, &endptr
, 10); 
3207     if (endptr
[0] != '\0') return REDIS_ERR
; 
3208     slen 
= ll2string(buf
,32,value
); 
3210     /* If the number converted back into a string is not identical 
3211      * then it's not possible to encode the string as integer */ 
3212     if (sdslen(s
) != (unsigned)slen 
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
; 
3213     if (longval
) *longval 
= value
; 
3217 /* Try to encode a string object in order to save space */ 
3218 static robj 
*tryObjectEncoding(robj 
*o
) { 
3222     if (o
->encoding 
!= REDIS_ENCODING_RAW
) 
3223         return o
; /* Already encoded */ 
3225     /* It's not safe to encode shared objects: shared objects can be shared 
3226      * everywhere in the "object space" of Redis. Encoded objects can only 
3227      * appear as "values" (and not, for instance, as keys) */ 
3228      if (o
->refcount 
> 1) return o
; 
3230     /* Currently we try to encode only strings */ 
3231     redisAssert(o
->type 
== REDIS_STRING
); 
3233     /* Check if we can represent this string as a long integer */ 
3234     if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
; 
3236     /* Ok, this object can be encoded */ 
3237     if (value 
>= 0 && value 
< REDIS_SHARED_INTEGERS
) { 
3239         incrRefCount(shared
.integers
[value
]); 
3240         return shared
.integers
[value
]; 
3242         o
->encoding 
= REDIS_ENCODING_INT
; 
3244         o
->ptr 
= (void*) value
; 
3249 /* Get a decoded version of an encoded object (returned as a new object). 
3250  * If the object is already raw-encoded just increment the ref count. */ 
3251 static robj 
*getDecodedObject(robj 
*o
) { 
3254     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3258     if (o
->type 
== REDIS_STRING 
&& o
->encoding 
== REDIS_ENCODING_INT
) { 
3261         ll2string(buf
,32,(long)o
->ptr
); 
3262         dec 
= createStringObject(buf
,strlen(buf
)); 
3265         redisPanic("Unknown encoding type"); 
3269 /* Compare two string objects via strcmp() or alike. 
3270  * Note that the objects may be integer-encoded. In such a case we 
3271  * use ll2string() to get a string representation of the numbers on the stack 
3272  * and compare the strings, it's much faster than calling getDecodedObject(). 
3274  * Important note: if objects are not integer encoded, but binary-safe strings, 
3275  * sdscmp() from sds.c will apply memcmp() so this function ca be considered 
3277 static int compareStringObjects(robj 
*a
, robj 
*b
) { 
3278     redisAssert(a
->type 
== REDIS_STRING 
&& b
->type 
== REDIS_STRING
); 
3279     char bufa
[128], bufb
[128], *astr
, *bstr
; 
3282     if (a 
== b
) return 0; 
3283     if (a
->encoding 
!= REDIS_ENCODING_RAW
) { 
3284         ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
); 
3290     if (b
->encoding 
!= REDIS_ENCODING_RAW
) { 
3291         ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
); 
3297     return bothsds 
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
); 
3300 /* Equal string objects return 1 if the two objects are the same from the 
3301  * point of view of a string comparison, otherwise 0 is returned. Note that 
3302  * this function is faster then checking for (compareStringObject(a,b) == 0) 
3303  * because it can perform some more optimization. */ 
3304 static int equalStringObjects(robj 
*a
, robj 
*b
) { 
3305     if (a
->encoding 
!= REDIS_ENCODING_RAW 
&& b
->encoding 
!= REDIS_ENCODING_RAW
){ 
3306         return a
->ptr 
== b
->ptr
; 
3308         return compareStringObjects(a
,b
) == 0; 
3312 static size_t stringObjectLen(robj 
*o
) { 
3313     redisAssert(o
->type 
== REDIS_STRING
); 
3314     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3315         return sdslen(o
->ptr
); 
3319         return ll2string(buf
,32,(long)o
->ptr
); 
3323 static int getDoubleFromObject(robj 
*o
, double *target
) { 
3330         redisAssert(o
->type 
== REDIS_STRING
); 
3331         if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3332             value 
= strtod(o
->ptr
, &eptr
); 
3333             if (eptr
[0] != '\0') return REDIS_ERR
; 
3334         } else if (o
->encoding 
== REDIS_ENCODING_INT
) { 
3335             value 
= (long)o
->ptr
; 
3337             redisPanic("Unknown string encoding"); 
3345 static int getDoubleFromObjectOrReply(redisClient 
*c
, robj 
*o
, double *target
, const char *msg
) { 
3347     if (getDoubleFromObject(o
, &value
) != REDIS_OK
) { 
3349             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3351             addReplySds(c
, sdsnew("-ERR value is not a double\r\n")); 
3360 static int getLongLongFromObject(robj 
*o
, long long *target
) { 
3367         redisAssert(o
->type 
== REDIS_STRING
); 
3368         if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3369             value 
= strtoll(o
->ptr
, &eptr
, 10); 
3370             if (eptr
[0] != '\0') return REDIS_ERR
; 
3371         } else if (o
->encoding 
== REDIS_ENCODING_INT
) { 
3372             value 
= (long)o
->ptr
; 
3374             redisPanic("Unknown string encoding"); 
3382 static int getLongLongFromObjectOrReply(redisClient 
*c
, robj 
*o
, long long *target
, const char *msg
) { 
3384     if (getLongLongFromObject(o
, &value
) != REDIS_OK
) { 
3386             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3388             addReplySds(c
, sdsnew("-ERR value is not an integer\r\n")); 
3397 static int getLongFromObjectOrReply(redisClient 
*c
, robj 
*o
, long *target
, const char *msg
) { 
3400     if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
; 
3401     if (value 
< LONG_MIN 
|| value 
> LONG_MAX
) { 
3403             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3405             addReplySds(c
, sdsnew("-ERR value is out of range\r\n")); 
3414 /* =========================== Keyspace access API ========================== */ 
3416 static robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
3417     dictEntry 
*de 
= dictFind(db
->dict
,key
->ptr
); 
3419         robj 
*val 
= dictGetEntryVal(de
); 
3421         if (server
.vm_enabled
) { 
3422             if (val
->storage 
== REDIS_VM_MEMORY 
|| 
3423                 val
->storage 
== REDIS_VM_SWAPPING
) 
3425                 /* If we were swapping the object out, cancel the operation */ 
3426                 if (val
->storage 
== REDIS_VM_SWAPPING
) 
3427                     vmCancelThreadedIOJob(val
); 
3428                 /* Update the access time for the aging algorithm. */ 
3429                 val
->lru 
= server
.lruclock
; 
3431                 int notify 
= (val
->storage 
== REDIS_VM_LOADING
); 
3433                 /* Our value was swapped on disk. Bring it at home. */ 
3434                 redisAssert(val
->type 
== REDIS_VMPOINTER
); 
3435                 val 
= vmLoadObject(val
); 
3436                 dictGetEntryVal(de
) = val
; 
3438                 /* Clients blocked by the VM subsystem may be waiting for 
3440                 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
); 
3449 static robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
3450     expireIfNeeded(db
,key
); 
3451     return lookupKey(db
,key
); 
3454 static robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
3455     deleteIfVolatile(db
,key
); 
3456     touchWatchedKey(db
,key
); 
3457     return lookupKey(db
,key
); 
3460 static robj 
*lookupKeyReadOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
3461     robj 
*o 
= lookupKeyRead(c
->db
, key
); 
3462     if (!o
) addReply(c
,reply
); 
3466 static robj 
*lookupKeyWriteOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
3467     robj 
*o 
= lookupKeyWrite(c
->db
, key
); 
3468     if (!o
) addReply(c
,reply
); 
3472 /* Add the key to the DB. If the key already exists REDIS_ERR is returned, 
3473  * otherwise REDIS_OK is returned, and the caller should increment the 
3474  * refcount of 'val'. */ 
3475 static int dbAdd(redisDb 
*db
, robj 
*key
, robj 
*val
) { 
3476     /* Perform a lookup before adding the key, as we need to copy the 
3478     if (dictFind(db
->dict
, key
->ptr
) != NULL
) { 
3481         sds copy 
= sdsdup(key
->ptr
); 
3482         dictAdd(db
->dict
, copy
, val
); 
3487 /* If the key does not exist, this is just like dbAdd(). Otherwise 
3488  * the value associated to the key is replaced with the new one. 
3490  * On update (key already existed) 0 is returned. Otherwise 1. */ 
3491 static int dbReplace(redisDb 
*db
, robj 
*key
, robj 
*val
) { 
3492     if (dictFind(db
->dict
,key
->ptr
) == NULL
) { 
3493         sds copy 
= sdsdup(key
->ptr
); 
3494         dictAdd(db
->dict
, copy
, val
); 
3497         dictReplace(db
->dict
, key
->ptr
, val
); 
3502 static int dbExists(redisDb 
*db
, robj 
*key
) { 
3503     return dictFind(db
->dict
,key
->ptr
) != NULL
; 
3506 /* Return a random key, in form of a Redis object. 
3507  * If there are no keys, NULL is returned. 
3509  * The function makes sure to return keys not already expired. */ 
3510 static robj 
*dbRandomKey(redisDb 
*db
) { 
3511     struct dictEntry 
*de
; 
3517         de 
= dictGetRandomKey(db
->dict
); 
3518         if (de 
== NULL
) return NULL
; 
3520         key 
= dictGetEntryKey(de
); 
3521         keyobj 
= createStringObject(key
,sdslen(key
)); 
3522         if (dictFind(db
->expires
,key
)) { 
3523             if (expireIfNeeded(db
,keyobj
)) { 
3524                 decrRefCount(keyobj
); 
3525                 continue; /* search for another key. This expired. */ 
3532 /* Delete a key, value, and associated expiration entry if any, from the DB */ 
3533 static int dbDelete(redisDb 
*db
, robj 
*key
) { 
3536     if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
->ptr
); 
3537     retval 
= dictDelete(db
->dict
,key
->ptr
); 
3539     return retval 
== DICT_OK
; 
3542 /*============================ RDB saving/loading =========================== */ 
3544 static int rdbSaveType(FILE *fp
, unsigned char type
) { 
3545     if (fwrite(&type
,1,1,fp
) == 0) return -1; 
3549 static int rdbSaveTime(FILE *fp
, time_t t
) { 
3550     int32_t t32 
= (int32_t) t
; 
3551     if (fwrite(&t32
,4,1,fp
) == 0) return -1; 
3555 /* check rdbLoadLen() comments for more info */ 
3556 static int rdbSaveLen(FILE *fp
, uint32_t len
) { 
3557     unsigned char buf
[2]; 
3560         /* Save a 6 bit len */ 
3561         buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6); 
3562         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3563     } else if (len 
< (1<<14)) { 
3564         /* Save a 14 bit len */ 
3565         buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6); 
3567         if (fwrite(buf
,2,1,fp
) == 0) return -1; 
3569         /* Save a 32 bit len */ 
3570         buf
[0] = (REDIS_RDB_32BITLEN
<<6); 
3571         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3573         if (fwrite(&len
,4,1,fp
) == 0) return -1; 
3578 /* Encode 'value' as an integer if possible (if integer will fit the 
3579  * supported range). If the function sucessful encoded the integer 
3580  * then the (up to 5 bytes) encoded representation is written in the 
3581  * string pointed by 'enc' and the length is returned. Otherwise 
3583 static int rdbEncodeInteger(long long value
, unsigned char *enc
) { 
3584     /* Finally check if it fits in our ranges */ 
3585     if (value 
>= -(1<<7) && value 
<= (1<<7)-1) { 
3586         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
; 
3587         enc
[1] = value
&0xFF; 
3589     } else if (value 
>= -(1<<15) && value 
<= (1<<15)-1) { 
3590         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
; 
3591         enc
[1] = value
&0xFF; 
3592         enc
[2] = (value
>>8)&0xFF; 
3594     } else if (value 
>= -((long long)1<<31) && value 
<= ((long long)1<<31)-1) { 
3595         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
; 
3596         enc
[1] = value
&0xFF; 
3597         enc
[2] = (value
>>8)&0xFF; 
3598         enc
[3] = (value
>>16)&0xFF; 
3599         enc
[4] = (value
>>24)&0xFF; 
3606 /* String objects in the form "2391" "-100" without any space and with a 
3607  * range of values that can fit in an 8, 16 or 32 bit signed value can be 
3608  * encoded as integers to save space */ 
3609 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) { 
3611     char *endptr
, buf
[32]; 
3613     /* Check if it's possible to encode this value as a number */ 
3614     value 
= strtoll(s
, &endptr
, 10); 
3615     if (endptr
[0] != '\0') return 0; 
3616     ll2string(buf
,32,value
); 
3618     /* If the number converted back into a string is not identical 
3619      * then it's not possible to encode the string as integer */ 
3620     if (strlen(buf
) != len 
|| memcmp(buf
,s
,len
)) return 0; 
3622     return rdbEncodeInteger(value
,enc
); 
3625 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) { 
3626     size_t comprlen
, outlen
; 
3630     /* We require at least four bytes compression for this to be worth it */ 
3631     if (len 
<= 4) return 0; 
3633     if ((out 
= zmalloc(outlen
+1)) == NULL
) return 0; 
3634     comprlen 
= lzf_compress(s
, len
, out
, outlen
); 
3635     if (comprlen 
== 0) { 
3639     /* Data compressed! Let's save it on disk */ 
3640     byte 
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
; 
3641     if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
; 
3642     if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
; 
3643     if (rdbSaveLen(fp
,len
) == -1) goto writeerr
; 
3644     if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
; 
3653 /* Save a string objet as [len][data] on disk. If the object is a string 
3654  * representation of an integer value we try to safe it in a special form */ 
3655 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) { 
3658     /* Try integer encoding */ 
3660         unsigned char buf
[5]; 
3661         if ((enclen 
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) { 
3662             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3667     /* Try LZF compression - under 20 bytes it's unable to compress even 
3668      * aaaaaaaaaaaaaaaaaa so skip it */ 
3669     if (server
.rdbcompression 
&& len 
> 20) { 
3672         retval 
= rdbSaveLzfStringObject(fp
,s
,len
); 
3673         if (retval 
== -1) return -1; 
3674         if (retval 
> 0) return 0; 
3675         /* retval == 0 means data can't be compressed, save the old way */ 
3678     /* Store verbatim */ 
3679     if (rdbSaveLen(fp
,len
) == -1) return -1; 
3680     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return -1; 
3684 /* Save a long long value as either an encoded string or a string. */ 
3685 static int rdbSaveLongLongAsStringObject(FILE *fp
, long long value
) { 
3686     unsigned char buf
[32]; 
3687     int enclen 
= rdbEncodeInteger(value
,buf
); 
3689         if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3691         /* Encode as string */ 
3692         enclen 
= ll2string((char*)buf
,32,value
); 
3693         redisAssert(enclen 
< 32); 
3694         if (rdbSaveLen(fp
,enclen
) == -1) return -1; 
3695         if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3700 /* Like rdbSaveStringObjectRaw() but handle encoded objects */ 
3701 static int rdbSaveStringObject(FILE *fp
, robj 
*obj
) { 
3702     /* Avoid to decode the object, then encode it again, if the 
3703      * object is alrady integer encoded. */ 
3704     if (obj
->encoding 
== REDIS_ENCODING_INT
) { 
3705         return rdbSaveLongLongAsStringObject(fp
,(long)obj
->ptr
); 
3707         redisAssert(obj
->encoding 
== REDIS_ENCODING_RAW
); 
3708         return rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3712 /* Save a double value. Doubles are saved as strings prefixed by an unsigned 
3713  * 8 bit integer specifing the length of the representation. 
3714  * This 8 bit integer has special values in order to specify the following 
3720 static int rdbSaveDoubleValue(FILE *fp
, double val
) { 
3721     unsigned char buf
[128]; 
3727     } else if (!isfinite(val
)) { 
3729         buf
[0] = (val 
< 0) ? 255 : 254; 
3731 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL) 
3732         /* Check if the float is in a safe range to be casted into a 
3733          * long long. We are assuming that long long is 64 bit here. 
3734          * Also we are assuming that there are no implementations around where 
3735          * double has precision < 52 bit. 
3737          * Under this assumptions we test if a double is inside an interval 
3738          * where casting to long long is safe. Then using two castings we 
3739          * make sure the decimal part is zero. If all this is true we use 
3740          * integer printing function that is much faster. */ 
3741         double min 
= -4503599627370495; /* (2^52)-1 */ 
3742         double max 
= 4503599627370496; /* -(2^52) */ 
3743         if (val 
> min 
&& val 
< max 
&& val 
== ((double)((long long)val
))) 
3744             ll2string((char*)buf
+1,sizeof(buf
),(long long)val
); 
3747             snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
); 
3748         buf
[0] = strlen((char*)buf
+1); 
3751     if (fwrite(buf
,len
,1,fp
) == 0) return -1; 
3755 /* Save a Redis object. */ 
3756 static int rdbSaveObject(FILE *fp
, robj 
*o
) { 
3757     if (o
->type 
== REDIS_STRING
) { 
3758         /* Save a string value */ 
3759         if (rdbSaveStringObject(fp
,o
) == -1) return -1; 
3760     } else if (o
->type 
== REDIS_LIST
) { 
3761         /* Save a list value */ 
3762         if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
3764             unsigned char *vstr
; 
3768             if (rdbSaveLen(fp
,ziplistLen(o
->ptr
)) == -1) return -1; 
3769             p 
= ziplistIndex(o
->ptr
,0); 
3770             while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
3772                     if (rdbSaveRawString(fp
,vstr
,vlen
) == -1) 
3775                     if (rdbSaveLongLongAsStringObject(fp
,vlong
) == -1) 
3778                 p 
= ziplistNext(o
->ptr
,p
); 
3780         } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
3781             list 
*list 
= o
->ptr
; 
3785             if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1; 
3786             listRewind(list
,&li
); 
3787             while((ln 
= listNext(&li
))) { 
3788                 robj 
*eleobj 
= listNodeValue(ln
); 
3789                 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3792             redisPanic("Unknown list encoding"); 
3794     } else if (o
->type 
== REDIS_SET
) { 
3795         /* Save a set value */ 
3797         dictIterator 
*di 
= dictGetIterator(set
); 
3800         if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1; 
3801         while((de 
= dictNext(di
)) != NULL
) { 
3802             robj 
*eleobj 
= dictGetEntryKey(de
); 
3804             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3806         dictReleaseIterator(di
); 
3807     } else if (o
->type 
== REDIS_ZSET
) { 
3808         /* Save a set value */ 
3810         dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
3813         if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1; 
3814         while((de 
= dictNext(di
)) != NULL
) { 
3815             robj 
*eleobj 
= dictGetEntryKey(de
); 
3816             double *score 
= dictGetEntryVal(de
); 
3818             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3819             if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1; 
3821         dictReleaseIterator(di
); 
3822     } else if (o
->type 
== REDIS_HASH
) { 
3823         /* Save a hash value */ 
3824         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
3825             unsigned char *p 
= zipmapRewind(o
->ptr
); 
3826             unsigned int count 
= zipmapLen(o
->ptr
); 
3827             unsigned char *key
, *val
; 
3828             unsigned int klen
, vlen
; 
3830             if (rdbSaveLen(fp
,count
) == -1) return -1; 
3831             while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
3832                 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1; 
3833                 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1; 
3836             dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
3839             if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1; 
3840             while((de 
= dictNext(di
)) != NULL
) { 
3841                 robj 
*key 
= dictGetEntryKey(de
); 
3842                 robj 
*val 
= dictGetEntryVal(de
); 
3844                 if (rdbSaveStringObject(fp
,key
) == -1) return -1; 
3845                 if (rdbSaveStringObject(fp
,val
) == -1) return -1; 
3847             dictReleaseIterator(di
); 
3850         redisPanic("Unknown object type"); 
3855 /* Return the length the object will have on disk if saved with 
3856  * the rdbSaveObject() function. Currently we use a trick to get 
3857  * this length with very little changes to the code. In the future 
3858  * we could switch to a faster solution. */ 
3859 static off_t 
rdbSavedObjectLen(robj 
*o
, FILE *fp
) { 
3860     if (fp 
== NULL
) fp 
= server
.devnull
; 
3862     assert(rdbSaveObject(fp
,o
) != 1); 
3866 /* Return the number of pages required to save this object in the swap file */ 
3867 static off_t 
rdbSavedObjectPages(robj 
*o
, FILE *fp
) { 
3868     off_t bytes 
= rdbSavedObjectLen(o
,fp
); 
3870     return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
; 
3873 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */ 
3874 static int rdbSave(char *filename
) { 
3875     dictIterator 
*di 
= NULL
; 
3880     time_t now 
= time(NULL
); 
3882     /* Wait for I/O therads to terminate, just in case this is a 
3883      * foreground-saving, to avoid seeking the swap file descriptor at the 
3885     if (server
.vm_enabled
) 
3886         waitEmptyIOJobsQueue(); 
3888     snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid()); 
3889     fp 
= fopen(tmpfile
,"w"); 
3891         redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
)); 
3894     if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
; 
3895     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
3896         redisDb 
*db 
= server
.db
+j
; 
3898         if (dictSize(d
) == 0) continue; 
3899         di 
= dictGetIterator(d
); 
3905         /* Write the SELECT DB opcode */ 
3906         if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
; 
3907         if (rdbSaveLen(fp
,j
) == -1) goto werr
; 
3909         /* Iterate this DB writing every entry */ 
3910         while((de 
= dictNext(di
)) != NULL
) { 
3911             sds keystr 
= dictGetEntryKey(de
); 
3912             robj key
, *o 
= dictGetEntryVal(de
); 
3915             initStaticStringObject(key
,keystr
); 
3916             expiretime 
= getExpire(db
,&key
); 
3918             /* Save the expire time */ 
3919             if (expiretime 
!= -1) { 
3920                 /* If this key is already expired skip it */ 
3921                 if (expiretime 
< now
) continue; 
3922                 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
; 
3923                 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
; 
3925             /* Save the key and associated value. This requires special 
3926              * handling if the value is swapped out. */ 
3927             if (!server
.vm_enabled 
|| o
->storage 
== REDIS_VM_MEMORY 
|| 
3928                                       o
->storage 
== REDIS_VM_SWAPPING
) { 
3929                 /* Save type, key, value */ 
3930                 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
; 
3931                 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
; 
3932                 if (rdbSaveObject(fp
,o
) == -1) goto werr
; 
3934                 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
3936                 /* Get a preview of the object in memory */ 
3937                 po 
= vmPreviewObject(o
); 
3938                 /* Save type, key, value */ 
3939                 if (rdbSaveType(fp
,po
->type
) == -1) goto werr
; 
3940                 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
; 
3941                 if (rdbSaveObject(fp
,po
) == -1) goto werr
; 
3942                 /* Remove the loaded object from memory */ 
3946         dictReleaseIterator(di
); 
3949     if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
; 
3951     /* Make sure data will not remain on the OS's output buffers */ 
3956     /* Use RENAME to make sure the DB file is changed atomically only 
3957      * if the generate DB file is ok. */ 
3958     if (rename(tmpfile
,filename
) == -1) { 
3959         redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
)); 
3963     redisLog(REDIS_NOTICE
,"DB saved on disk"); 
3965     server
.lastsave 
= time(NULL
); 
3971     redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
)); 
3972     if (di
) dictReleaseIterator(di
); 
3976 static int rdbSaveBackground(char *filename
) { 
3979     if (server
.bgsavechildpid 
!= -1) return REDIS_ERR
; 
3980     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
3981     if ((childpid 
= fork()) == 0) { 
3983         if (server
.vm_enabled
) vmReopenSwapFile(); 
3985         if (rdbSave(filename
) == REDIS_OK
) { 
3992         if (childpid 
== -1) { 
3993             redisLog(REDIS_WARNING
,"Can't save in background: fork: %s", 
3997         redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
); 
3998         server
.bgsavechildpid 
= childpid
; 
3999         updateDictResizePolicy(); 
4002     return REDIS_OK
; /* unreached */ 
4005 static void rdbRemoveTempFile(pid_t childpid
) { 
4008     snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
); 
4012 static int rdbLoadType(FILE *fp
) { 
4014     if (fread(&type
,1,1,fp
) == 0) return -1; 
4018 static time_t rdbLoadTime(FILE *fp
) { 
4020     if (fread(&t32
,4,1,fp
) == 0) return -1; 
4021     return (time_t) t32
; 
4024 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top 
4025  * of this file for a description of how this are stored on disk. 
4027  * isencoded is set to 1 if the readed length is not actually a length but 
4028  * an "encoding type", check the above comments for more info */ 
4029 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) { 
4030     unsigned char buf
[2]; 
4034     if (isencoded
) *isencoded 
= 0; 
4035     if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
4036     type 
= (buf
[0]&0xC0)>>6; 
4037     if (type 
== REDIS_RDB_6BITLEN
) { 
4038         /* Read a 6 bit len */ 
4040     } else if (type 
== REDIS_RDB_ENCVAL
) { 
4041         /* Read a 6 bit len encoding type */ 
4042         if (isencoded
) *isencoded 
= 1; 
4044     } else if (type 
== REDIS_RDB_14BITLEN
) { 
4045         /* Read a 14 bit len */ 
4046         if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
4047         return ((buf
[0]&0x3F)<<8)|buf
[1]; 
4049         /* Read a 32 bit len */ 
4050         if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
; 
4055 /* Load an integer-encoded object from file 'fp', with the specified 
4056  * encoding type 'enctype'. If encode is true the function may return 
4057  * an integer-encoded object as reply, otherwise the returned object 
4058  * will always be encoded as a raw string. */ 
4059 static robj 
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) { 
4060     unsigned char enc
[4]; 
4063     if (enctype 
== REDIS_RDB_ENC_INT8
) { 
4064         if (fread(enc
,1,1,fp
) == 0) return NULL
; 
4065         val 
= (signed char)enc
[0]; 
4066     } else if (enctype 
== REDIS_RDB_ENC_INT16
) { 
4068         if (fread(enc
,2,1,fp
) == 0) return NULL
; 
4069         v 
= enc
[0]|(enc
[1]<<8); 
4071     } else if (enctype 
== REDIS_RDB_ENC_INT32
) { 
4073         if (fread(enc
,4,1,fp
) == 0) return NULL
; 
4074         v 
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24); 
4077         val 
= 0; /* anti-warning */ 
4078         redisPanic("Unknown RDB integer encoding type"); 
4081         return createStringObjectFromLongLong(val
); 
4083         return createObject(REDIS_STRING
,sdsfromlonglong(val
)); 
4086 static robj 
*rdbLoadLzfStringObject(FILE*fp
) { 
4087     unsigned int len
, clen
; 
4088     unsigned char *c 
= NULL
; 
4091     if ((clen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4092     if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4093     if ((c 
= zmalloc(clen
)) == NULL
) goto err
; 
4094     if ((val 
= sdsnewlen(NULL
,len
)) == NULL
) goto err
; 
4095     if (fread(c
,clen
,1,fp
) == 0) goto err
; 
4096     if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
; 
4098     return createObject(REDIS_STRING
,val
); 
4105 static robj 
*rdbGenericLoadStringObject(FILE*fp
, int encode
) { 
4110     len 
= rdbLoadLen(fp
,&isencoded
); 
4113         case REDIS_RDB_ENC_INT8
: 
4114         case REDIS_RDB_ENC_INT16
: 
4115         case REDIS_RDB_ENC_INT32
: 
4116             return rdbLoadIntegerObject(fp
,len
,encode
); 
4117         case REDIS_RDB_ENC_LZF
: 
4118             return rdbLoadLzfStringObject(fp
); 
4120             redisPanic("Unknown RDB encoding type"); 
4124     if (len 
== REDIS_RDB_LENERR
) return NULL
; 
4125     val 
= sdsnewlen(NULL
,len
); 
4126     if (len 
&& fread(val
,len
,1,fp
) == 0) { 
4130     return createObject(REDIS_STRING
,val
); 
4133 static robj 
*rdbLoadStringObject(FILE *fp
) { 
4134     return rdbGenericLoadStringObject(fp
,0); 
4137 static robj 
*rdbLoadEncodedStringObject(FILE *fp
) { 
4138     return rdbGenericLoadStringObject(fp
,1); 
4141 /* For information about double serialization check rdbSaveDoubleValue() */ 
4142 static int rdbLoadDoubleValue(FILE *fp
, double *val
) { 
4146     if (fread(&len
,1,1,fp
) == 0) return -1; 
4148     case 255: *val 
= R_NegInf
; return 0; 
4149     case 254: *val 
= R_PosInf
; return 0; 
4150     case 253: *val 
= R_Nan
; return 0; 
4152         if (fread(buf
,len
,1,fp
) == 0) return -1; 
4154         sscanf(buf
, "%lg", val
); 
4159 /* Load a Redis object of the specified type from the specified file. 
4160  * On success a newly allocated object is returned, otherwise NULL. */ 
4161 static robj 
*rdbLoadObject(int type
, FILE *fp
) { 
4162     robj 
*o
, *ele
, *dec
; 
4165     redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
)); 
4166     if (type 
== REDIS_STRING
) { 
4167         /* Read string value */ 
4168         if ((o 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4169         o 
= tryObjectEncoding(o
); 
4170     } else if (type 
== REDIS_LIST
) { 
4171         /* Read list value */ 
4172         if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4174         /* Use a real list when there are too many entries */ 
4175         if (len 
> server
.list_max_ziplist_entries
) { 
4176             o 
= createListObject(); 
4178             o 
= createZiplistObject(); 
4181         /* Load every single element of the list */ 
4183             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4185             /* If we are using a ziplist and the value is too big, convert 
4186              * the object to a real list. */ 
4187             if (o
->encoding 
== REDIS_ENCODING_ZIPLIST 
&& 
4188                 ele
->encoding 
== REDIS_ENCODING_RAW 
&& 
4189                 sdslen(ele
->ptr
) > server
.list_max_ziplist_value
) 
4190                     listTypeConvert(o
,REDIS_ENCODING_LIST
); 
4192             if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4193                 dec 
= getDecodedObject(ele
); 
4194                 o
->ptr 
= ziplistPush(o
->ptr
,dec
->ptr
,sdslen(dec
->ptr
),REDIS_TAIL
); 
4198                 ele 
= tryObjectEncoding(ele
); 
4199                 listAddNodeTail(o
->ptr
,ele
); 
4202     } else if (type 
== REDIS_SET
) { 
4203         /* Read list/set value */ 
4204         if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4205         o 
= createSetObject(); 
4206         /* It's faster to expand the dict to the right size asap in order 
4207          * to avoid rehashing */ 
4208         if (len 
> DICT_HT_INITIAL_SIZE
) 
4209             dictExpand(o
->ptr
,len
); 
4210         /* Load every single element of the list/set */ 
4212             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4213             ele 
= tryObjectEncoding(ele
); 
4214             dictAdd((dict
*)o
->ptr
,ele
,NULL
); 
4216     } else if (type 
== REDIS_ZSET
) { 
4217         /* Read list/set value */ 
4221         if ((zsetlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4222         o 
= createZsetObject(); 
4224         /* Load every single element of the list/set */ 
4227             double *score 
= zmalloc(sizeof(double)); 
4229             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4230             ele 
= tryObjectEncoding(ele
); 
4231             if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
; 
4232             dictAdd(zs
->dict
,ele
,score
); 
4233             zslInsert(zs
->zsl
,*score
,ele
); 
4234             incrRefCount(ele
); /* added to skiplist */ 
4236     } else if (type 
== REDIS_HASH
) { 
4239         if ((hashlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4240         o 
= createHashObject(); 
4241         /* Too many entries? Use an hash table. */ 
4242         if (hashlen 
> server
.hash_max_zipmap_entries
) 
4243             convertToRealHash(o
); 
4244         /* Load every key/value, then set it into the zipmap or hash 
4245          * table, as needed. */ 
4249             if ((key 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
4250             if ((val 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
4251             /* If we are using a zipmap and there are too big values 
4252              * the object is converted to real hash table encoding. */ 
4253             if (o
->encoding 
!= REDIS_ENCODING_HT 
&& 
4254                (sdslen(key
->ptr
) > server
.hash_max_zipmap_value 
|| 
4255                 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
)) 
4257                     convertToRealHash(o
); 
4260             if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
4261                 unsigned char *zm 
= o
->ptr
; 
4263                 zm 
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
), 
4264                                   val
->ptr
,sdslen(val
->ptr
),NULL
); 
4269                 key 
= tryObjectEncoding(key
); 
4270                 val 
= tryObjectEncoding(val
); 
4271                 dictAdd((dict
*)o
->ptr
,key
,val
); 
4275         redisPanic("Unknown object type"); 
4280 static int rdbLoad(char *filename
) { 
4283     int type
, retval
, rdbver
; 
4284     int swap_all_values 
= 0; 
4285     redisDb 
*db 
= server
.db
+0; 
4287     time_t expiretime
, now 
= time(NULL
); 
4289     fp 
= fopen(filename
,"r"); 
4290     if (!fp
) return REDIS_ERR
; 
4291     if (fread(buf
,9,1,fp
) == 0) goto eoferr
; 
4293     if (memcmp(buf
,"REDIS",5) != 0) { 
4295         redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file"); 
4298     rdbver 
= atoi(buf
+5); 
4301         redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
); 
4310         if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
4311         if (type 
== REDIS_EXPIRETIME
) { 
4312             if ((expiretime 
= rdbLoadTime(fp
)) == -1) goto eoferr
; 
4313             /* We read the time so we need to read the object type again */ 
4314             if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
4316         if (type 
== REDIS_EOF
) break; 
4317         /* Handle SELECT DB opcode as a special case */ 
4318         if (type 
== REDIS_SELECTDB
) { 
4319             if ((dbid 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) 
4321             if (dbid 
>= (unsigned)server
.dbnum
) { 
4322                 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
); 
4325             db 
= server
.db
+dbid
; 
4329         if ((key 
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
; 
4331         if ((val 
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
; 
4332         /* Check if the key already expired */ 
4333         if (expiretime 
!= -1 && expiretime 
< now
) { 
4338         /* Add the new object in the hash table */ 
4339         retval 
= dbAdd(db
,key
,val
); 
4340         if (retval 
== REDIS_ERR
) { 
4341             redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
); 
4344         /* Set the expire time if needed */ 
4345         if (expiretime 
!= -1) setExpire(db
,key
,expiretime
); 
4347         /* Handle swapping while loading big datasets when VM is on */ 
4349         /* If we detecter we are hopeless about fitting something in memory 
4350          * we just swap every new key on disk. Directly... 
4351          * Note that's important to check for this condition before resorting 
4352          * to random sampling, otherwise we may try to swap already 
4354         if (swap_all_values
) { 
4355             dictEntry 
*de 
= dictFind(db
->dict
,key
->ptr
); 
4357             /* de may be NULL since the key already expired */ 
4360                 val 
= dictGetEntryVal(de
); 
4362                 if (val
->refcount 
== 1 && 
4363                     (vp 
= vmSwapObjectBlocking(val
)) != NULL
) 
4364                     dictGetEntryVal(de
) = vp
; 
4371         /* Flush data on disk once 32 MB of additional RAM are used... */ 
4373         if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32) 
4376         /* If we have still some hope of having some value fitting memory 
4377          * then we try random sampling. */ 
4378         if (!swap_all_values 
&& server
.vm_enabled 
&& force_swapout
) { 
4379             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
4380                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
4382             if (zmalloc_used_memory() > server
.vm_max_memory
) 
4383                 swap_all_values 
= 1; /* We are already using too much mem */ 
4389 eoferr
: /* unexpected end of file is handled here with a fatal exit */ 
4390     redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now."); 
4392     return REDIS_ERR
; /* Just to avoid warning */ 
4395 /*================================== Shutdown =============================== */ 
4396 static int prepareForShutdown() { 
4397     redisLog(REDIS_WARNING
,"User requested shutdown, saving DB..."); 
4398     /* Kill the saving child if there is a background saving in progress. 
4399        We want to avoid race conditions, for instance our saving child may 
4400        overwrite the synchronous saving did by SHUTDOWN. */ 
4401     if (server
.bgsavechildpid 
!= -1) { 
4402         redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!"); 
4403         kill(server
.bgsavechildpid
,SIGKILL
); 
4404         rdbRemoveTempFile(server
.bgsavechildpid
); 
4406     if (server
.appendonly
) { 
4407         /* Append only file: fsync() the AOF and exit */ 
4408         aof_fsync(server
.appendfd
); 
4409         if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4411         /* Snapshotting. Perform a SYNC SAVE and exit */ 
4412         if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4413             if (server
.daemonize
) 
4414                 unlink(server
.pidfile
); 
4415             redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory()); 
4417             /* Ooops.. error saving! The best we can do is to continue 
4418              * operating. Note that if there was a background saving process, 
4419              * in the next cron() Redis will be notified that the background 
4420              * saving aborted, handling special stuff like slaves pending for 
4421              * synchronization... */ 
4422             redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit"); 
4426     redisLog(REDIS_WARNING
,"Server exit now, bye bye..."); 
4430 /*================================== Commands =============================== */ 
4432 static void authCommand(redisClient 
*c
) { 
4433     if (!server
.requirepass 
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) { 
4434       c
->authenticated 
= 1; 
4435       addReply(c
,shared
.ok
); 
4437       c
->authenticated 
= 0; 
4438       addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); 
4442 static void pingCommand(redisClient 
*c
) { 
4443     addReply(c
,shared
.pong
); 
4446 static void echoCommand(redisClient 
*c
) { 
4447     addReplyBulk(c
,c
->argv
[1]); 
4450 /*=================================== Strings =============================== */ 
4452 static void setGenericCommand(redisClient 
*c
, int nx
, robj 
*key
, robj 
*val
, robj 
*expire
) { 
4454     long seconds 
= 0; /* initialized to avoid an harmness warning */ 
4457         if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
) 
4460             addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n")); 
4465     touchWatchedKey(c
->db
,key
); 
4466     if (nx
) deleteIfVolatile(c
->db
,key
); 
4467     retval 
= dbAdd(c
->db
,key
,val
); 
4468     if (retval 
== REDIS_ERR
) { 
4470             dbReplace(c
->db
,key
,val
); 
4473             addReply(c
,shared
.czero
); 
4480     removeExpire(c
->db
,key
); 
4481     if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
); 
4482     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
4485 static void setCommand(redisClient 
*c
) { 
4486     setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
); 
4489 static void setnxCommand(redisClient 
*c
) { 
4490     setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
); 
4493 static void setexCommand(redisClient 
*c
) { 
4494     setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]); 
4497 static int getGenericCommand(redisClient 
*c
) { 
4500     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
) 
4503     if (o
->type 
!= REDIS_STRING
) { 
4504         addReply(c
,shared
.wrongtypeerr
); 
4512 static void getCommand(redisClient 
*c
) { 
4513     getGenericCommand(c
); 
4516 static void getsetCommand(redisClient 
*c
) { 
4517     if (getGenericCommand(c
) == REDIS_ERR
) return; 
4518     dbReplace(c
->db
,c
->argv
[1],c
->argv
[2]); 
4519     incrRefCount(c
->argv
[2]); 
4521     removeExpire(c
->db
,c
->argv
[1]); 
4524 static void mgetCommand(redisClient 
*c
) { 
4527     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1)); 
4528     for (j 
= 1; j 
< c
->argc
; j
++) { 
4529         robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[j
]); 
4531             addReply(c
,shared
.nullbulk
); 
4533             if (o
->type 
!= REDIS_STRING
) { 
4534                 addReply(c
,shared
.nullbulk
); 
4542 static void msetGenericCommand(redisClient 
*c
, int nx
) { 
4543     int j
, busykeys 
= 0; 
4545     if ((c
->argc 
% 2) == 0) { 
4546         addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n")); 
4549     /* Handle the NX flag. The MSETNX semantic is to return zero and don't 
4550      * set nothing at all if at least one already key exists. */ 
4552         for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
4553             if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) { 
4559         addReply(c
, shared
.czero
); 
4563     for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
4564         c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]); 
4565         dbReplace(c
->db
,c
->argv
[j
],c
->argv
[j
+1]); 
4566         incrRefCount(c
->argv
[j
+1]); 
4567         removeExpire(c
->db
,c
->argv
[j
]); 
4569     server
.dirty 
+= (c
->argc
-1)/2; 
4570     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
4573 static void msetCommand(redisClient 
*c
) { 
4574     msetGenericCommand(c
,0); 
4577 static void msetnxCommand(redisClient 
*c
) { 
4578     msetGenericCommand(c
,1); 
4581 static void incrDecrCommand(redisClient 
*c
, long long incr
) { 
4585     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4586     if (o 
!= NULL 
&& checkType(c
,o
,REDIS_STRING
)) return; 
4587     if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return; 
4590     o 
= createStringObjectFromLongLong(value
); 
4591     dbReplace(c
->db
,c
->argv
[1],o
); 
4593     addReply(c
,shared
.colon
); 
4595     addReply(c
,shared
.crlf
); 
4598 static void incrCommand(redisClient 
*c
) { 
4599     incrDecrCommand(c
,1); 
4602 static void decrCommand(redisClient 
*c
) { 
4603     incrDecrCommand(c
,-1); 
4606 static void incrbyCommand(redisClient 
*c
) { 
4609     if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return; 
4610     incrDecrCommand(c
,incr
); 
4613 static void decrbyCommand(redisClient 
*c
) { 
4616     if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return; 
4617     incrDecrCommand(c
,-incr
); 
4620 static void appendCommand(redisClient 
*c
) { 
4625     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4627         /* Create the key */ 
4628         retval 
= dbAdd(c
->db
,c
->argv
[1],c
->argv
[2]); 
4629         incrRefCount(c
->argv
[2]); 
4630         totlen 
= stringObjectLen(c
->argv
[2]); 
4632         if (o
->type 
!= REDIS_STRING
) { 
4633             addReply(c
,shared
.wrongtypeerr
); 
4636         /* If the object is specially encoded or shared we have to make 
4638         if (o
->refcount 
!= 1 || o
->encoding 
!= REDIS_ENCODING_RAW
) { 
4639             robj 
*decoded 
= getDecodedObject(o
); 
4641             o 
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
)); 
4642             decrRefCount(decoded
); 
4643             dbReplace(c
->db
,c
->argv
[1],o
); 
4646         if (c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW
) { 
4647             o
->ptr 
= sdscatlen(o
->ptr
, 
4648                 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
)); 
4650             o
->ptr 
= sdscatprintf(o
->ptr
, "%ld", 
4651                 (unsigned long) c
->argv
[2]->ptr
); 
4653         totlen 
= sdslen(o
->ptr
); 
4656     addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
)); 
4659 static void substrCommand(redisClient 
*c
) { 
4661     long start 
= atoi(c
->argv
[2]->ptr
); 
4662     long end 
= atoi(c
->argv
[3]->ptr
); 
4663     size_t rangelen
, strlen
; 
4666     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4667         checkType(c
,o
,REDIS_STRING
)) return; 
4669     o 
= getDecodedObject(o
); 
4670     strlen 
= sdslen(o
->ptr
); 
4672     /* convert negative indexes */ 
4673     if (start 
< 0) start 
= strlen
+start
; 
4674     if (end 
< 0) end 
= strlen
+end
; 
4675     if (start 
< 0) start 
= 0; 
4676     if (end 
< 0) end 
= 0; 
4678     /* indexes sanity checks */ 
4679     if (start 
> end 
|| (size_t)start 
>= strlen
) { 
4680         /* Out of range start or start > end result in null reply */ 
4681         addReply(c
,shared
.nullbulk
); 
4685     if ((size_t)end 
>= strlen
) end 
= strlen
-1; 
4686     rangelen 
= (end
-start
)+1; 
4688     /* Return the result */ 
4689     addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
)); 
4690     range 
= sdsnewlen((char*)o
->ptr
+start
,rangelen
); 
4691     addReplySds(c
,range
); 
4692     addReply(c
,shared
.crlf
); 
4696 /* ========================= Type agnostic commands ========================= */ 
4698 static void delCommand(redisClient 
*c
) { 
4701     for (j 
= 1; j 
< c
->argc
; j
++) { 
4702         if (dbDelete(c
->db
,c
->argv
[j
])) { 
4703             touchWatchedKey(c
->db
,c
->argv
[j
]); 
4708     addReplyLongLong(c
,deleted
); 
4711 static void existsCommand(redisClient 
*c
) { 
4712     expireIfNeeded(c
->db
,c
->argv
[1]); 
4713     if (dbExists(c
->db
,c
->argv
[1])) { 
4714         addReply(c
, shared
.cone
); 
4716         addReply(c
, shared
.czero
); 
4720 static void selectCommand(redisClient 
*c
) { 
4721     int id 
= atoi(c
->argv
[1]->ptr
); 
4723     if (selectDb(c
,id
) == REDIS_ERR
) { 
4724         addReplySds(c
,sdsnew("-ERR invalid DB index\r\n")); 
4726         addReply(c
,shared
.ok
); 
4730 static void randomkeyCommand(redisClient 
*c
) { 
4733     if ((key 
= dbRandomKey(c
->db
)) == NULL
) { 
4734         addReply(c
,shared
.nullbulk
); 
4738     addReplyBulk(c
,key
); 
4742 static void keysCommand(redisClient 
*c
) { 
4745     sds pattern 
= c
->argv
[1]->ptr
; 
4746     int plen 
= sdslen(pattern
); 
4747     unsigned long numkeys 
= 0; 
4748     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
4750     di 
= dictGetIterator(c
->db
->dict
); 
4752     decrRefCount(lenobj
); 
4753     while((de 
= dictNext(di
)) != NULL
) { 
4754         sds key 
= dictGetEntryKey(de
); 
4757         if ((pattern
[0] == '*' && pattern
[1] == '\0') || 
4758             stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
4759             keyobj 
= createStringObject(key
,sdslen(key
)); 
4760             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
4761                 addReplyBulk(c
,keyobj
); 
4764             decrRefCount(keyobj
); 
4767     dictReleaseIterator(di
); 
4768     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
); 
4771 static void dbsizeCommand(redisClient 
*c
) { 
4773         sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
))); 
4776 static void lastsaveCommand(redisClient 
*c
) { 
4778         sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
)); 
4781 static void typeCommand(redisClient 
*c
) { 
4785     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4790         case REDIS_STRING
: type 
= "+string"; break; 
4791         case REDIS_LIST
: type 
= "+list"; break; 
4792         case REDIS_SET
: type 
= "+set"; break; 
4793         case REDIS_ZSET
: type 
= "+zset"; break; 
4794         case REDIS_HASH
: type 
= "+hash"; break; 
4795         default: type 
= "+unknown"; break; 
4798     addReplySds(c
,sdsnew(type
)); 
4799     addReply(c
,shared
.crlf
); 
4802 static void saveCommand(redisClient 
*c
) { 
4803     if (server
.bgsavechildpid 
!= -1) { 
4804         addReplySds(c
,sdsnew("-ERR background save in progress\r\n")); 
4807     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4808         addReply(c
,shared
.ok
); 
4810         addReply(c
,shared
.err
); 
4814 static void bgsaveCommand(redisClient 
*c
) { 
4815     if (server
.bgsavechildpid 
!= -1) { 
4816         addReplySds(c
,sdsnew("-ERR background save already in progress\r\n")); 
4819     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
4820         char *status 
= "+Background saving started\r\n"; 
4821         addReplySds(c
,sdsnew(status
)); 
4823         addReply(c
,shared
.err
); 
4827 static void shutdownCommand(redisClient 
*c
) { 
4828     if (prepareForShutdown() == REDIS_OK
) 
4830     addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n")); 
4833 static void renameGenericCommand(redisClient 
*c
, int nx
) { 
4836     /* To use the same key as src and dst is probably an error */ 
4837     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
4838         addReply(c
,shared
.sameobjecterr
); 
4842     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
) 
4846     deleteIfVolatile(c
->db
,c
->argv
[2]); 
4847     if (dbAdd(c
->db
,c
->argv
[2],o
) == REDIS_ERR
) { 
4850             addReply(c
,shared
.czero
); 
4853         dbReplace(c
->db
,c
->argv
[2],o
); 
4855     dbDelete(c
->db
,c
->argv
[1]); 
4856     touchWatchedKey(c
->db
,c
->argv
[2]); 
4858     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
4861 static void renameCommand(redisClient 
*c
) { 
4862     renameGenericCommand(c
,0); 
4865 static void renamenxCommand(redisClient 
*c
) { 
4866     renameGenericCommand(c
,1); 
4869 static void moveCommand(redisClient 
*c
) { 
4874     /* Obtain source and target DB pointers */ 
4877     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
4878         addReply(c
,shared
.outofrangeerr
); 
4882     selectDb(c
,srcid
); /* Back to the source DB */ 
4884     /* If the user is moving using as target the same 
4885      * DB as the source DB it is probably an error. */ 
4887         addReply(c
,shared
.sameobjecterr
); 
4891     /* Check if the element exists and get a reference */ 
4892     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4894         addReply(c
,shared
.czero
); 
4898     /* Try to add the element to the target DB */ 
4899     deleteIfVolatile(dst
,c
->argv
[1]); 
4900     if (dbAdd(dst
,c
->argv
[1],o
) == REDIS_ERR
) { 
4901         addReply(c
,shared
.czero
); 
4906     /* OK! key moved, free the entry in the source DB */ 
4907     dbDelete(src
,c
->argv
[1]); 
4909     addReply(c
,shared
.cone
); 
4912 /* =================================== Lists ================================ */ 
4915 /* Check the argument length to see if it requires us to convert the ziplist 
4916  * to a real list. Only check raw-encoded objects because integer encoded 
4917  * objects are never too long. */ 
4918 static void listTypeTryConversion(robj 
*subject
, robj 
*value
) { 
4919     if (subject
->encoding 
!= REDIS_ENCODING_ZIPLIST
) return; 
4920     if (value
->encoding 
== REDIS_ENCODING_RAW 
&& 
4921         sdslen(value
->ptr
) > server
.list_max_ziplist_value
) 
4922             listTypeConvert(subject
,REDIS_ENCODING_LIST
); 
4925 static void listTypePush(robj 
*subject
, robj 
*value
, int where
) { 
4926     /* Check if we need to convert the ziplist */ 
4927     listTypeTryConversion(subject
,value
); 
4928     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST 
&& 
4929         ziplistLen(subject
->ptr
) > server
.list_max_ziplist_entries
) 
4930             listTypeConvert(subject
,REDIS_ENCODING_LIST
); 
4932     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4933         int pos 
= (where 
== REDIS_HEAD
) ? ZIPLIST_HEAD 
: ZIPLIST_TAIL
; 
4934         value 
= getDecodedObject(value
); 
4935         subject
->ptr 
= ziplistPush(subject
->ptr
,value
->ptr
,sdslen(value
->ptr
),pos
); 
4936         decrRefCount(value
); 
4937     } else if (subject
->encoding 
== REDIS_ENCODING_LIST
) { 
4938         if (where 
== REDIS_HEAD
) { 
4939             listAddNodeHead(subject
->ptr
,value
); 
4941             listAddNodeTail(subject
->ptr
,value
); 
4943         incrRefCount(value
); 
4945         redisPanic("Unknown list encoding"); 
4949 static robj 
*listTypePop(robj 
*subject
, int where
) { 
4951     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4953         unsigned char *vstr
; 
4956         int pos 
= (where 
== REDIS_HEAD
) ? 0 : -1; 
4957         p 
= ziplistIndex(subject
->ptr
,pos
); 
4958         if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
4960                 value 
= createStringObject((char*)vstr
,vlen
); 
4962                 value 
= createStringObjectFromLongLong(vlong
); 
4964             /* We only need to delete an element when it exists */ 
4965             subject
->ptr 
= ziplistDelete(subject
->ptr
,&p
); 
4967     } else if (subject
->encoding 
== REDIS_ENCODING_LIST
) { 
4968         list 
*list 
= subject
->ptr
; 
4970         if (where 
== REDIS_HEAD
) { 
4971             ln 
= listFirst(list
); 
4973             ln 
= listLast(list
); 
4976             value 
= listNodeValue(ln
); 
4977             incrRefCount(value
); 
4978             listDelNode(list
,ln
); 
4981         redisPanic("Unknown list encoding"); 
4986 static unsigned long listTypeLength(robj 
*subject
) { 
4987     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4988         return ziplistLen(subject
->ptr
); 
4989     } else if (subject
->encoding 
== REDIS_ENCODING_LIST
) { 
4990         return listLength((list
*)subject
->ptr
); 
4992         redisPanic("Unknown list encoding"); 
4996 /* Structure to hold set iteration abstraction. */ 
4999     unsigned char encoding
; 
5000     unsigned char direction
; /* Iteration direction */ 
5005 /* Structure for an entry while iterating over a list. */ 
5007     listTypeIterator 
*li
; 
5008     unsigned char *zi
;  /* Entry in ziplist */ 
5009     listNode 
*ln
;       /* Entry in linked list */ 
5012 /* Initialize an iterator at the specified index. */ 
5013 static listTypeIterator 
*listTypeInitIterator(robj 
*subject
, int index
, unsigned char direction
) { 
5014     listTypeIterator 
*li 
= zmalloc(sizeof(listTypeIterator
)); 
5015     li
->subject 
= subject
; 
5016     li
->encoding 
= subject
->encoding
; 
5017     li
->direction 
= direction
; 
5018     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5019         li
->zi 
= ziplistIndex(subject
->ptr
,index
); 
5020     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5021         li
->ln 
= listIndex(subject
->ptr
,index
); 
5023         redisPanic("Unknown list encoding"); 
5028 /* Clean up the iterator. */ 
5029 static void listTypeReleaseIterator(listTypeIterator 
*li
) { 
5033 /* Stores pointer to current the entry in the provided entry structure 
5034  * and advances the position of the iterator. Returns 1 when the current 
5035  * entry is in fact an entry, 0 otherwise. */ 
5036 static int listTypeNext(listTypeIterator 
*li
, listTypeEntry 
*entry
) { 
5037     /* Protect from converting when iterating */ 
5038     redisAssert(li
->subject
->encoding 
== li
->encoding
); 
5041     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5043         if (entry
->zi 
!= NULL
) { 
5044             if (li
->direction 
== REDIS_TAIL
) 
5045                 li
->zi 
= ziplistNext(li
->subject
->ptr
,li
->zi
); 
5047                 li
->zi 
= ziplistPrev(li
->subject
->ptr
,li
->zi
); 
5050     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5052         if (entry
->ln 
!= NULL
) { 
5053             if (li
->direction 
== REDIS_TAIL
) 
5054                 li
->ln 
= li
->ln
->next
; 
5056                 li
->ln 
= li
->ln
->prev
; 
5060         redisPanic("Unknown list encoding"); 
5065 /* Return entry or NULL at the current position of the iterator. */ 
5066 static robj 
*listTypeGet(listTypeEntry 
*entry
) { 
5067     listTypeIterator 
*li 
= entry
->li
; 
5069     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5070         unsigned char *vstr
; 
5073         redisAssert(entry
->zi 
!= NULL
); 
5074         if (ziplistGet(entry
->zi
,&vstr
,&vlen
,&vlong
)) { 
5076                 value 
= createStringObject((char*)vstr
,vlen
); 
5078                 value 
= createStringObjectFromLongLong(vlong
); 
5081     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5082         redisAssert(entry
->ln 
!= NULL
); 
5083         value 
= listNodeValue(entry
->ln
); 
5084         incrRefCount(value
); 
5086         redisPanic("Unknown list encoding"); 
5091 static void listTypeInsert(robj 
*subject
, listTypeEntry 
*old_entry
, robj 
*new_obj
, int where
) { 
5092     listTypeTryConversion(subject
,new_obj
); 
5093     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5094         if (where 
== REDIS_HEAD
) { 
5095             unsigned char *next 
= ziplistNext(subject
->ptr
,old_entry
->zi
); 
5097                 listTypePush(subject
,new_obj
,REDIS_TAIL
); 
5099                 subject
->ptr 
= ziplistInsert(subject
->ptr
,next
,new_obj
->ptr
,sdslen(new_obj
->ptr
)); 
5102             subject
->ptr 
= ziplistInsert(subject
->ptr
,old_entry
->zi
,new_obj
->ptr
,sdslen(new_obj
->ptr
)); 
5104     } else if (subject
->encoding 
== REDIS_ENCODING_LIST
) { 
5105         if (where 
== REDIS_HEAD
) { 
5106             listInsertNode(subject
->ptr
,old_entry
->ln
,new_obj
,1); 
5108             listInsertNode(subject
->ptr
,old_entry
->ln
,new_obj
,0); 
5110         incrRefCount(new_obj
); 
5112         redisPanic("Unknown list encoding"); 
5116 /* Compare the given object with the entry at the current position. */ 
5117 static int listTypeEqual(listTypeEntry 
*entry
, robj 
*o
) { 
5118     listTypeIterator 
*li 
= entry
->li
; 
5119     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5120         redisAssert(o
->encoding 
== REDIS_ENCODING_RAW
); 
5121         return ziplistCompare(entry
->zi
,o
->ptr
,sdslen(o
->ptr
)); 
5122     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5123         return equalStringObjects(o
,listNodeValue(entry
->ln
)); 
5125         redisPanic("Unknown list encoding"); 
5129 /* Delete the element pointed to. */ 
5130 static void listTypeDelete(listTypeEntry 
*entry
) { 
5131     listTypeIterator 
*li 
= entry
->li
; 
5132     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5133         unsigned char *p 
= entry
->zi
; 
5134         li
->subject
->ptr 
= ziplistDelete(li
->subject
->ptr
,&p
); 
5136         /* Update position of the iterator depending on the direction */ 
5137         if (li
->direction 
== REDIS_TAIL
) 
5140             li
->zi 
= ziplistPrev(li
->subject
->ptr
,p
); 
5141     } else if (entry
->li
->encoding 
== REDIS_ENCODING_LIST
) { 
5143         if (li
->direction 
== REDIS_TAIL
) 
5144             next 
= entry
->ln
->next
; 
5146             next 
= entry
->ln
->prev
; 
5147         listDelNode(li
->subject
->ptr
,entry
->ln
); 
5150         redisPanic("Unknown list encoding"); 
5154 static void listTypeConvert(robj 
*subject
, int enc
) { 
5155     listTypeIterator 
*li
; 
5156     listTypeEntry entry
; 
5157     redisAssert(subject
->type 
== REDIS_LIST
); 
5159     if (enc 
== REDIS_ENCODING_LIST
) { 
5160         list 
*l 
= listCreate(); 
5161         listSetFreeMethod(l
,decrRefCount
); 
5163         /* listTypeGet returns a robj with incremented refcount */ 
5164         li 
= listTypeInitIterator(subject
,0,REDIS_TAIL
); 
5165         while (listTypeNext(li
,&entry
)) listAddNodeTail(l
,listTypeGet(&entry
)); 
5166         listTypeReleaseIterator(li
); 
5168         subject
->encoding 
= REDIS_ENCODING_LIST
; 
5169         zfree(subject
->ptr
); 
5172         redisPanic("Unsupported list conversion"); 
5176 static void pushGenericCommand(redisClient 
*c
, int where
) { 
5177     robj 
*lobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5179         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
5180             addReply(c
,shared
.cone
); 
5183         lobj 
= createZiplistObject(); 
5184         dbAdd(c
->db
,c
->argv
[1],lobj
); 
5186         if (lobj
->type 
!= REDIS_LIST
) { 
5187             addReply(c
,shared
.wrongtypeerr
); 
5190         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
5191             addReply(c
,shared
.cone
); 
5195     listTypePush(lobj
,c
->argv
[2],where
); 
5196     addReplyLongLong(c
,listTypeLength(lobj
)); 
5200 static void lpushCommand(redisClient 
*c
) { 
5201     pushGenericCommand(c
,REDIS_HEAD
); 
5204 static void rpushCommand(redisClient 
*c
) { 
5205     pushGenericCommand(c
,REDIS_TAIL
); 
5208 static void pushxGenericCommand(redisClient 
*c
, int where
, robj 
*old_obj
, robj 
*new_obj
) { 
5210     listTypeIterator 
*iter
; 
5211     listTypeEntry entry
; 
5213     if ((subject 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5214         checkType(c
,subject
,REDIS_LIST
)) return; 
5215     if (handleClientsWaitingListPush(c
,c
->argv
[1],new_obj
)) { 
5216         addReply(c
,shared
.cone
); 
5220     if (old_obj 
!= NULL
) { 
5221         if (where 
== REDIS_HEAD
) { 
5222             iter 
= listTypeInitIterator(subject
,0,REDIS_TAIL
); 
5224             iter 
= listTypeInitIterator(subject
,-1,REDIS_HEAD
); 
5226         while (listTypeNext(iter
,&entry
)) { 
5227             if (listTypeEqual(&entry
,old_obj
)) { 
5228                 listTypeInsert(subject
,&entry
,new_obj
,where
); 
5232         listTypeReleaseIterator(iter
); 
5234         listTypePush(subject
,new_obj
,where
); 
5238     addReplyUlong(c
,listTypeLength(subject
)); 
5241 static void lpushxCommand(redisClient 
*c
) { 
5242     pushxGenericCommand(c
,REDIS_HEAD
,NULL
,c
->argv
[2]); 
5245 static void rpushxCommand(redisClient 
*c
) { 
5246     pushxGenericCommand(c
,REDIS_TAIL
,NULL
,c
->argv
[2]); 
5249 static void linsertCommand(redisClient 
*c
) { 
5250     if (strcasecmp(c
->argv
[2]->ptr
,"after") == 0) { 
5251         pushxGenericCommand(c
,REDIS_HEAD
,c
->argv
[3],c
->argv
[4]); 
5252     } else if (strcasecmp(c
->argv
[2]->ptr
,"before") == 0) { 
5253         pushxGenericCommand(c
,REDIS_TAIL
,c
->argv
[3],c
->argv
[4]); 
5255         addReply(c
,shared
.syntaxerr
); 
5259 static void llenCommand(redisClient 
*c
) { 
5260     robj 
*o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
); 
5261     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5262     addReplyUlong(c
,listTypeLength(o
)); 
5265 static void lindexCommand(redisClient 
*c
) { 
5266     robj 
*o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
); 
5267     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5268     int index 
= atoi(c
->argv
[2]->ptr
); 
5271     if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5273         unsigned char *vstr
; 
5276         p 
= ziplistIndex(o
->ptr
,index
); 
5277         if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
5279                 value 
= createStringObject((char*)vstr
,vlen
); 
5281                 value 
= createStringObjectFromLongLong(vlong
); 
5283             addReplyBulk(c
,value
); 
5284             decrRefCount(value
); 
5286             addReply(c
,shared
.nullbulk
); 
5288     } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
5289         listNode 
*ln 
= listIndex(o
->ptr
,index
); 
5291             value 
= listNodeValue(ln
); 
5292             addReplyBulk(c
,value
); 
5294             addReply(c
,shared
.nullbulk
); 
5297         redisPanic("Unknown list encoding"); 
5301 static void lsetCommand(redisClient 
*c
) { 
5302     robj 
*o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
); 
5303     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5304     int index 
= atoi(c
->argv
[2]->ptr
); 
5305     robj 
*value 
= c
->argv
[3]; 
5307     listTypeTryConversion(o
,value
); 
5308     if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5309         unsigned char *p
, *zl 
= o
->ptr
; 
5310         p 
= ziplistIndex(zl
,index
); 
5312             addReply(c
,shared
.outofrangeerr
); 
5314             o
->ptr 
= ziplistDelete(o
->ptr
,&p
); 
5315             value 
= getDecodedObject(value
); 
5316             o
->ptr 
= ziplistInsert(o
->ptr
,p
,value
->ptr
,sdslen(value
->ptr
)); 
5317             decrRefCount(value
); 
5318             addReply(c
,shared
.ok
); 
5321     } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
5322         listNode 
*ln 
= listIndex(o
->ptr
,index
); 
5324             addReply(c
,shared
.outofrangeerr
); 
5326             decrRefCount((robj
*)listNodeValue(ln
)); 
5327             listNodeValue(ln
) = value
; 
5328             incrRefCount(value
); 
5329             addReply(c
,shared
.ok
); 
5333         redisPanic("Unknown list encoding"); 
5337 static void popGenericCommand(redisClient 
*c
, int where
) { 
5338     robj 
*o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
); 
5339     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5341     robj 
*value 
= listTypePop(o
,where
); 
5342     if (value 
== NULL
) { 
5343         addReply(c
,shared
.nullbulk
); 
5345         addReplyBulk(c
,value
); 
5346         decrRefCount(value
); 
5347         if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5352 static void lpopCommand(redisClient 
*c
) { 
5353     popGenericCommand(c
,REDIS_HEAD
); 
5356 static void rpopCommand(redisClient 
*c
) { 
5357     popGenericCommand(c
,REDIS_TAIL
); 
5360 static void lrangeCommand(redisClient 
*c
) { 
5362     int start 
= atoi(c
->argv
[2]->ptr
); 
5363     int end 
= atoi(c
->argv
[3]->ptr
); 
5366     listTypeEntry entry
; 
5368     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
5369          || checkType(c
,o
,REDIS_LIST
)) return; 
5370     llen 
= listTypeLength(o
); 
5372     /* convert negative indexes */ 
5373     if (start 
< 0) start 
= llen
+start
; 
5374     if (end 
< 0) end 
= llen
+end
; 
5375     if (start 
< 0) start 
= 0; 
5376     if (end 
< 0) end 
= 0; 
5378     /* indexes sanity checks */ 
5379     if (start 
> end 
|| start 
>= llen
) { 
5380         /* Out of range start or start > end result in empty list */ 
5381         addReply(c
,shared
.emptymultibulk
); 
5384     if (end 
>= llen
) end 
= llen
-1; 
5385     rangelen 
= (end
-start
)+1; 
5387     /* Return the result in form of a multi-bulk reply */ 
5388     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
)); 
5389     listTypeIterator 
*li 
= listTypeInitIterator(o
,start
,REDIS_TAIL
); 
5390     for (j 
= 0; j 
< rangelen
; j
++) { 
5391         redisAssert(listTypeNext(li
,&entry
)); 
5392         value 
= listTypeGet(&entry
); 
5393         addReplyBulk(c
,value
); 
5394         decrRefCount(value
); 
5396     listTypeReleaseIterator(li
); 
5399 static void ltrimCommand(redisClient 
*c
) { 
5401     int start 
= atoi(c
->argv
[2]->ptr
); 
5402     int end 
= atoi(c
->argv
[3]->ptr
); 
5404     int j
, ltrim
, rtrim
; 
5408     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL 
|| 
5409         checkType(c
,o
,REDIS_LIST
)) return; 
5410     llen 
= listTypeLength(o
); 
5412     /* convert negative indexes */ 
5413     if (start 
< 0) start 
= llen
+start
; 
5414     if (end 
< 0) end 
= llen
+end
; 
5415     if (start 
< 0) start 
= 0; 
5416     if (end 
< 0) end 
= 0; 
5418     /* indexes sanity checks */ 
5419     if (start 
> end 
|| start 
>= llen
) { 
5420         /* Out of range start or start > end result in empty list */ 
5424         if (end 
>= llen
) end 
= llen
-1; 
5429     /* Remove list elements to perform the trim */ 
5430     if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5431         o
->ptr 
= ziplistDeleteRange(o
->ptr
,0,ltrim
); 
5432         o
->ptr 
= ziplistDeleteRange(o
->ptr
,-rtrim
,rtrim
); 
5433     } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
5435         for (j 
= 0; j 
< ltrim
; j
++) { 
5436             ln 
= listFirst(list
); 
5437             listDelNode(list
,ln
); 
5439         for (j 
= 0; j 
< rtrim
; j
++) { 
5440             ln 
= listLast(list
); 
5441             listDelNode(list
,ln
); 
5444         redisPanic("Unknown list encoding"); 
5446     if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5448     addReply(c
,shared
.ok
); 
5451 static void lremCommand(redisClient 
*c
) { 
5452     robj 
*subject
, *obj 
= c
->argv
[3]; 
5453     int toremove 
= atoi(c
->argv
[2]->ptr
); 
5455     listTypeEntry entry
; 
5457     subject 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
); 
5458     if (subject 
== NULL 
|| checkType(c
,subject
,REDIS_LIST
)) return; 
5460     /* Make sure obj is raw when we're dealing with a ziplist */ 
5461     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) 
5462         obj 
= getDecodedObject(obj
); 
5464     listTypeIterator 
*li
; 
5466         toremove 
= -toremove
; 
5467         li 
= listTypeInitIterator(subject
,-1,REDIS_HEAD
); 
5469         li 
= listTypeInitIterator(subject
,0,REDIS_TAIL
); 
5472     while (listTypeNext(li
,&entry
)) { 
5473         if (listTypeEqual(&entry
,obj
)) { 
5474             listTypeDelete(&entry
); 
5477             if (toremove 
&& removed 
== toremove
) break; 
5480     listTypeReleaseIterator(li
); 
5482     /* Clean up raw encoded object */ 
5483     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) 
5486     if (listTypeLength(subject
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5487     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
)); 
5490 /* This is the semantic of this command: 
5491  *  RPOPLPUSH srclist dstlist: 
5492  *   IF LLEN(srclist) > 0 
5493  *     element = RPOP srclist 
5494  *     LPUSH dstlist element 
5501  * The idea is to be able to get an element from a list in a reliable way 
5502  * since the element is not just returned but pushed against another list 
5503  * as well. This command was originally proposed by Ezra Zygmuntowicz. 
5505 static void rpoplpushcommand(redisClient 
*c
) { 
5507     if ((sobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5508         checkType(c
,sobj
,REDIS_LIST
)) return; 
5510     if (listTypeLength(sobj
) == 0) { 
5511         addReply(c
,shared
.nullbulk
); 
5513         robj 
*dobj 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
5514         if (dobj 
&& checkType(c
,dobj
,REDIS_LIST
)) return; 
5515         value 
= listTypePop(sobj
,REDIS_TAIL
); 
5517         /* Add the element to the target list (unless it's directly 
5518          * passed to some BLPOP-ing client */ 
5519         if (!handleClientsWaitingListPush(c
,c
->argv
[2],value
)) { 
5520             /* Create the list if the key does not exist */ 
5522                 dobj 
= createZiplistObject(); 
5523                 dbAdd(c
->db
,c
->argv
[2],dobj
); 
5525             listTypePush(dobj
,value
,REDIS_HEAD
); 
5528         /* Send the element to the client as reply as well */ 
5529         addReplyBulk(c
,value
); 
5531         /* listTypePop returns an object with its refcount incremented */ 
5532         decrRefCount(value
); 
5534         /* Delete the source list when it is empty */ 
5535         if (listTypeLength(sobj
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5540 /* ==================================== Sets ================================ */ 
5542 static void saddCommand(redisClient 
*c
) { 
5545     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5547         set 
= createSetObject(); 
5548         dbAdd(c
->db
,c
->argv
[1],set
); 
5550         if (set
->type 
!= REDIS_SET
) { 
5551             addReply(c
,shared
.wrongtypeerr
); 
5555     if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) { 
5556         incrRefCount(c
->argv
[2]); 
5558         addReply(c
,shared
.cone
); 
5560         addReply(c
,shared
.czero
); 
5564 static void sremCommand(redisClient 
*c
) { 
5567     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5568         checkType(c
,set
,REDIS_SET
)) return; 
5570     if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) { 
5572         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
5573         if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5574         addReply(c
,shared
.cone
); 
5576         addReply(c
,shared
.czero
); 
5580 static void smoveCommand(redisClient 
*c
) { 
5581     robj 
*srcset
, *dstset
; 
5583     srcset 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5584     dstset 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
5586     /* If the source key does not exist return 0, if it's of the wrong type 
5588     if (srcset 
== NULL 
|| srcset
->type 
!= REDIS_SET
) { 
5589         addReply(c
, srcset 
? shared
.wrongtypeerr 
: shared
.czero
); 
5592     /* Error if the destination key is not a set as well */ 
5593     if (dstset 
&& dstset
->type 
!= REDIS_SET
) { 
5594         addReply(c
,shared
.wrongtypeerr
); 
5597     /* Remove the element from the source set */ 
5598     if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) { 
5599         /* Key not found in the src set! return zero */ 
5600         addReply(c
,shared
.czero
); 
5603     if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset 
!= dstset
) 
5604         dbDelete(c
->db
,c
->argv
[1]); 
5606     /* Add the element to the destination set */ 
5608         dstset 
= createSetObject(); 
5609         dbAdd(c
->db
,c
->argv
[2],dstset
); 
5611     if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
) 
5612         incrRefCount(c
->argv
[3]); 
5613     addReply(c
,shared
.cone
); 
5616 static void sismemberCommand(redisClient 
*c
) { 
5619     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5620         checkType(c
,set
,REDIS_SET
)) return; 
5622     if (dictFind(set
->ptr
,c
->argv
[2])) 
5623         addReply(c
,shared
.cone
); 
5625         addReply(c
,shared
.czero
); 
5628 static void scardCommand(redisClient 
*c
) { 
5632     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5633         checkType(c
,o
,REDIS_SET
)) return; 
5636     addReplyUlong(c
,dictSize(s
)); 
5639 static void spopCommand(redisClient 
*c
) { 
5643     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5644         checkType(c
,set
,REDIS_SET
)) return; 
5646     de 
= dictGetRandomKey(set
->ptr
); 
5648         addReply(c
,shared
.nullbulk
); 
5650         robj 
*ele 
= dictGetEntryKey(de
); 
5652         addReplyBulk(c
,ele
); 
5653         dictDelete(set
->ptr
,ele
); 
5654         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
5655         if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5660 static void srandmemberCommand(redisClient 
*c
) { 
5664     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5665         checkType(c
,set
,REDIS_SET
)) return; 
5667     de 
= dictGetRandomKey(set
->ptr
); 
5669         addReply(c
,shared
.nullbulk
); 
5671         robj 
*ele 
= dictGetEntryKey(de
); 
5673         addReplyBulk(c
,ele
); 
5677 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) { 
5678     dict 
**d1 
= (void*) s1
, **d2 
= (void*) s2
; 
5680     return dictSize(*d1
)-dictSize(*d2
); 
5683 static void sinterGenericCommand(redisClient 
*c
, robj 
**setskeys
, unsigned long setsnum
, robj 
*dstkey
) { 
5684     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
5687     robj 
*lenobj 
= NULL
, *dstset 
= NULL
; 
5688     unsigned long j
, cardinality 
= 0; 
5690     for (j 
= 0; j 
< setsnum
; j
++) { 
5694                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
5695                     lookupKeyRead(c
->db
,setskeys
[j
]); 
5699                 if (dbDelete(c
->db
,dstkey
)) 
5701                 addReply(c
,shared
.czero
); 
5703                 addReply(c
,shared
.emptymultibulk
); 
5707         if (setobj
->type 
!= REDIS_SET
) { 
5709             addReply(c
,shared
.wrongtypeerr
); 
5712         dv
[j
] = setobj
->ptr
; 
5714     /* Sort sets from the smallest to largest, this will improve our 
5715      * algorithm's performace */ 
5716     qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
); 
5718     /* The first thing we should output is the total number of elements... 
5719      * since this is a multi-bulk write, but at this stage we don't know 
5720      * the intersection set size, so we use a trick, append an empty object 
5721      * to the output list and save the pointer to later modify it with the 
5724         lenobj 
= createObject(REDIS_STRING
,NULL
); 
5726         decrRefCount(lenobj
); 
5728         /* If we have a target key where to store the resulting set 
5729          * create this key with an empty set inside */ 
5730         dstset 
= createSetObject(); 
5733     /* Iterate all the elements of the first (smallest) set, and test 
5734      * the element against all the other sets, if at least one set does 
5735      * not include the element it is discarded */ 
5736     di 
= dictGetIterator(dv
[0]); 
5738     while((de 
= dictNext(di
)) != NULL
) { 
5741         for (j 
= 1; j 
< setsnum
; j
++) 
5742             if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break; 
5744             continue; /* at least one set does not contain the member */ 
5745         ele 
= dictGetEntryKey(de
); 
5747             addReplyBulk(c
,ele
); 
5750             dictAdd(dstset
->ptr
,ele
,NULL
); 
5754     dictReleaseIterator(di
); 
5757         /* Store the resulting set into the target, if the intersection 
5758          * is not an empty set. */ 
5759         dbDelete(c
->db
,dstkey
); 
5760         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
5761             dbAdd(c
->db
,dstkey
,dstset
); 
5762             addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
)); 
5764             decrRefCount(dstset
); 
5765             addReply(c
,shared
.czero
); 
5769         lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
); 
5774 static void sinterCommand(redisClient 
*c
) { 
5775     sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
); 
5778 static void sinterstoreCommand(redisClient 
*c
) { 
5779     sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]); 
5782 #define REDIS_OP_UNION 0 
5783 #define REDIS_OP_DIFF 1 
5784 #define REDIS_OP_INTER 2 
5786 static void sunionDiffGenericCommand(redisClient 
*c
, robj 
**setskeys
, int setsnum
, robj 
*dstkey
, int op
) { 
5787     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
5790     robj 
*dstset 
= NULL
; 
5791     int j
, cardinality 
= 0; 
5793     for (j 
= 0; j 
< setsnum
; j
++) { 
5797                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
5798                     lookupKeyRead(c
->db
,setskeys
[j
]); 
5803         if (setobj
->type 
!= REDIS_SET
) { 
5805             addReply(c
,shared
.wrongtypeerr
); 
5808         dv
[j
] = setobj
->ptr
; 
5811     /* We need a temp set object to store our union. If the dstkey 
5812      * is not NULL (that is, we are inside an SUNIONSTORE operation) then 
5813      * this set object will be the resulting object to set into the target key*/ 
5814     dstset 
= createSetObject(); 
5816     /* Iterate all the elements of all the sets, add every element a single 
5817      * time to the result set */ 
5818     for (j 
= 0; j 
< setsnum
; j
++) { 
5819         if (op 
== REDIS_OP_DIFF 
&& j 
== 0 && !dv
[j
]) break; /* result set is empty */ 
5820         if (!dv
[j
]) continue; /* non existing keys are like empty sets */ 
5822         di 
= dictGetIterator(dv
[j
]); 
5824         while((de 
= dictNext(di
)) != NULL
) { 
5827             /* dictAdd will not add the same element multiple times */ 
5828             ele 
= dictGetEntryKey(de
); 
5829             if (op 
== REDIS_OP_UNION 
|| j 
== 0) { 
5830                 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) { 
5834             } else if (op 
== REDIS_OP_DIFF
) { 
5835                 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) { 
5840         dictReleaseIterator(di
); 
5842         /* result set is empty? Exit asap. */ 
5843         if (op 
== REDIS_OP_DIFF 
&& cardinality 
== 0) break; 
5846     /* Output the content of the resulting set, if not in STORE mode */ 
5848         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
)); 
5849         di 
= dictGetIterator(dstset
->ptr
); 
5850         while((de 
= dictNext(di
)) != NULL
) { 
5853             ele 
= dictGetEntryKey(de
); 
5854             addReplyBulk(c
,ele
); 
5856         dictReleaseIterator(di
); 
5857         decrRefCount(dstset
); 
5859         /* If we have a target key where to store the resulting set 
5860          * create this key with the result set inside */ 
5861         dbDelete(c
->db
,dstkey
); 
5862         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
5863             dbAdd(c
->db
,dstkey
,dstset
); 
5864             addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
)); 
5866             decrRefCount(dstset
); 
5867             addReply(c
,shared
.czero
); 
5874 static void sunionCommand(redisClient 
*c
) { 
5875     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
); 
5878 static void sunionstoreCommand(redisClient 
*c
) { 
5879     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
); 
5882 static void sdiffCommand(redisClient 
*c
) { 
5883     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
); 
5886 static void sdiffstoreCommand(redisClient 
*c
) { 
5887     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
); 
5890 /* ==================================== ZSets =============================== */ 
5892 /* ZSETs are ordered sets using two data structures to hold the same elements 
5893  * in order to get O(log(N)) INSERT and REMOVE operations into a sorted 
5896  * The elements are added to an hash table mapping Redis objects to scores. 
5897  * At the same time the elements are added to a skip list mapping scores 
5898  * to Redis objects (so objects are sorted by scores in this "view"). */ 
5900 /* This skiplist implementation is almost a C translation of the original 
5901  * algorithm described by William Pugh in "Skip Lists: A Probabilistic 
5902  * Alternative to Balanced Trees", modified in three ways: 
5903  * a) this implementation allows for repeated values. 
5904  * b) the comparison is not just by key (our 'score') but by satellite data. 
5905  * c) there is a back pointer, so it's a doubly linked list with the back 
5906  * pointers being only at "level 1". This allows to traverse the list 
5907  * from tail to head, useful for ZREVRANGE. */ 
5909 static zskiplistNode 
*zslCreateNode(int level
, double score
, robj 
*obj
) { 
5910     zskiplistNode 
*zn 
= zmalloc(sizeof(*zn
)); 
5912     zn
->forward 
= zmalloc(sizeof(zskiplistNode
*) * level
); 
5914         zn
->span 
= zmalloc(sizeof(unsigned int) * (level 
- 1)); 
5922 static zskiplist 
*zslCreate(void) { 
5926     zsl 
= zmalloc(sizeof(*zsl
)); 
5929     zsl
->header 
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
); 
5930     for (j 
= 0; j 
< ZSKIPLIST_MAXLEVEL
; j
++) { 
5931         zsl
->header
->forward
[j
] = NULL
; 
5933         /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */ 
5934         if (j 
< ZSKIPLIST_MAXLEVEL
-1) 
5935             zsl
->header
->span
[j
] = 0; 
5937     zsl
->header
->backward 
= NULL
; 
5942 static void zslFreeNode(zskiplistNode 
*node
) { 
5943     decrRefCount(node
->obj
); 
5944     zfree(node
->forward
); 
5949 static void zslFree(zskiplist 
*zsl
) { 
5950     zskiplistNode 
*node 
= zsl
->header
->forward
[0], *next
; 
5952     zfree(zsl
->header
->forward
); 
5953     zfree(zsl
->header
->span
); 
5956         next 
= node
->forward
[0]; 
5963 static int zslRandomLevel(void) { 
5965     while ((random()&0xFFFF) < (ZSKIPLIST_P 
* 0xFFFF)) 
5967     return (level
<ZSKIPLIST_MAXLEVEL
) ? level 
: ZSKIPLIST_MAXLEVEL
; 
5970 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5971     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5972     unsigned int rank
[ZSKIPLIST_MAXLEVEL
]; 
5976     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5977         /* store rank that is crossed to reach the insert position */ 
5978         rank
[i
] = i 
== (zsl
->level
-1) ? 0 : rank
[i
+1]; 
5980         while (x
->forward
[i
] && 
5981             (x
->forward
[i
]->score 
< score 
|| 
5982                 (x
->forward
[i
]->score 
== score 
&& 
5983                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) { 
5984             rank
[i
] += i 
> 0 ? x
->span
[i
-1] : 1; 
5989     /* we assume the key is not already inside, since we allow duplicated 
5990      * scores, and the re-insertion of score and redis object should never 
5991      * happpen since the caller of zslInsert() should test in the hash table 
5992      * if the element is already inside or not. */ 
5993     level 
= zslRandomLevel(); 
5994     if (level 
> zsl
->level
) { 
5995         for (i 
= zsl
->level
; i 
< level
; i
++) { 
5997             update
[i
] = zsl
->header
; 
5998             update
[i
]->span
[i
-1] = zsl
->length
; 
6002     x 
= zslCreateNode(level
,score
,obj
); 
6003     for (i 
= 0; i 
< level
; i
++) { 
6004         x
->forward
[i
] = update
[i
]->forward
[i
]; 
6005         update
[i
]->forward
[i
] = x
; 
6007         /* update span covered by update[i] as x is inserted here */ 
6009             x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]); 
6010             update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1; 
6014     /* increment span for untouched levels */ 
6015     for (i 
= level
; i 
< zsl
->level
; i
++) { 
6016         update
[i
]->span
[i
-1]++; 
6019     x
->backward 
= (update
[0] == zsl
->header
) ? NULL 
: update
[0]; 
6021         x
->forward
[0]->backward 
= x
; 
6027 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */ 
6028 void zslDeleteNode(zskiplist 
*zsl
, zskiplistNode 
*x
, zskiplistNode 
**update
) { 
6030     for (i 
= 0; i 
< zsl
->level
; i
++) { 
6031         if (update
[i
]->forward
[i
] == x
) { 
6033                 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1; 
6035             update
[i
]->forward
[i
] = x
->forward
[i
]; 
6037             /* invariant: i > 0, because update[0]->forward[0] 
6038              * is always equal to x */ 
6039             update
[i
]->span
[i
-1] -= 1; 
6042     if (x
->forward
[0]) { 
6043         x
->forward
[0]->backward 
= x
->backward
; 
6045         zsl
->tail 
= x
->backward
; 
6047     while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
6052 /* Delete an element with matching score/object from the skiplist. */ 
6053 static int zslDelete(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
6054     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
6058     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6059         while (x
->forward
[i
] && 
6060             (x
->forward
[i
]->score 
< score 
|| 
6061                 (x
->forward
[i
]->score 
== score 
&& 
6062                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
6066     /* We may have multiple elements with the same score, what we need 
6067      * is to find the element with both the right score and object. */ 
6069     if (x 
&& score 
== x
->score 
&& equalStringObjects(x
->obj
,obj
)) { 
6070         zslDeleteNode(zsl
, x
, update
); 
6074         return 0; /* not found */ 
6076     return 0; /* not found */ 
6079 /* Delete all the elements with score between min and max from the skiplist. 
6080  * Min and mx are inclusive, so a score >= min || score <= max is deleted. 
6081  * Note that this function takes the reference to the hash table view of the 
6082  * sorted set, in order to remove the elements from the hash table too. */ 
6083 static unsigned long zslDeleteRangeByScore(zskiplist 
*zsl
, double min
, double max
, dict 
*dict
) { 
6084     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
6085     unsigned long removed 
= 0; 
6089     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6090         while (x
->forward
[i
] && x
->forward
[i
]->score 
< min
) 
6094     /* We may have multiple elements with the same score, what we need 
6095      * is to find the element with both the right score and object. */ 
6097     while (x 
&& x
->score 
<= max
) { 
6098         zskiplistNode 
*next 
= x
->forward
[0]; 
6099         zslDeleteNode(zsl
, x
, update
); 
6100         dictDelete(dict
,x
->obj
); 
6105     return removed
; /* not found */ 
6108 /* Delete all the elements with rank between start and end from the skiplist. 
6109  * Start and end are inclusive. Note that start and end need to be 1-based */ 
6110 static unsigned long zslDeleteRangeByRank(zskiplist 
*zsl
, unsigned int start
, unsigned int end
, dict 
*dict
) { 
6111     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
6112     unsigned long traversed 
= 0, removed 
= 0; 
6116     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6117         while (x
->forward
[i
] && (traversed 
+ (i 
> 0 ? x
->span
[i
-1] : 1)) < start
) { 
6118             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
6126     while (x 
&& traversed 
<= end
) { 
6127         zskiplistNode 
*next 
= x
->forward
[0]; 
6128         zslDeleteNode(zsl
, x
, update
); 
6129         dictDelete(dict
,x
->obj
); 
6138 /* Find the first node having a score equal or greater than the specified one. 
6139  * Returns NULL if there is no match. */ 
6140 static zskiplistNode 
*zslFirstWithScore(zskiplist 
*zsl
, double score
) { 
6145     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6146         while (x
->forward
[i
] && x
->forward
[i
]->score 
< score
) 
6149     /* We may have multiple elements with the same score, what we need 
6150      * is to find the element with both the right score and object. */ 
6151     return x
->forward
[0]; 
6154 /* Find the rank for an element by both score and key. 
6155  * Returns 0 when the element cannot be found, rank otherwise. 
6156  * Note that the rank is 1-based due to the span of zsl->header to the 
6158 static unsigned long zslistTypeGetRank(zskiplist 
*zsl
, double score
, robj 
*o
) { 
6160     unsigned long rank 
= 0; 
6164     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6165         while (x
->forward
[i
] && 
6166             (x
->forward
[i
]->score 
< score 
|| 
6167                 (x
->forward
[i
]->score 
== score 
&& 
6168                 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) { 
6169             rank 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
6173         /* x might be equal to zsl->header, so test if obj is non-NULL */ 
6174         if (x
->obj 
&& equalStringObjects(x
->obj
,o
)) { 
6181 /* Finds an element by its rank. The rank argument needs to be 1-based. */ 
6182 zskiplistNode
* zslistTypeGetElementByRank(zskiplist 
*zsl
, unsigned long rank
) { 
6184     unsigned long traversed 
= 0; 
6188     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6189         while (x
->forward
[i
] && (traversed 
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
) 
6191             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
6194         if (traversed 
== rank
) { 
6201 /* The actual Z-commands implementations */ 
6203 /* This generic command implements both ZADD and ZINCRBY. 
6204  * scoreval is the score if the operation is a ZADD (doincrement == 0) or 
6205  * the increment if the operation is a ZINCRBY (doincrement == 1). */ 
6206 static void zaddGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*ele
, double scoreval
, int doincrement
) { 
6211     if (isnan(scoreval
)) { 
6212         addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n")); 
6216     zsetobj 
= lookupKeyWrite(c
->db
,key
); 
6217     if (zsetobj 
== NULL
) { 
6218         zsetobj 
= createZsetObject(); 
6219         dbAdd(c
->db
,key
,zsetobj
); 
6221         if (zsetobj
->type 
!= REDIS_ZSET
) { 
6222             addReply(c
,shared
.wrongtypeerr
); 
6228     /* Ok now since we implement both ZADD and ZINCRBY here the code 
6229      * needs to handle the two different conditions. It's all about setting 
6230      * '*score', that is, the new score to set, to the right value. */ 
6231     score 
= zmalloc(sizeof(double)); 
6235         /* Read the old score. If the element was not present starts from 0 */ 
6236         de 
= dictFind(zs
->dict
,ele
); 
6238             double *oldscore 
= dictGetEntryVal(de
); 
6239             *score 
= *oldscore 
+ scoreval
; 
6243         if (isnan(*score
)) { 
6245                 sdsnew("-ERR resulting score is Not A Number (nan)\r\n")); 
6247             /* Note that we don't need to check if the zset may be empty and 
6248              * should be removed here, as we can only obtain Nan as score if 
6249              * there was already an element in the sorted set. */ 
6256     /* What follows is a simple remove and re-insert operation that is common 
6257      * to both ZADD and ZINCRBY... */ 
6258     if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) { 
6259         /* case 1: New element */ 
6260         incrRefCount(ele
); /* added to hash */ 
6261         zslInsert(zs
->zsl
,*score
,ele
); 
6262         incrRefCount(ele
); /* added to skiplist */ 
6265             addReplyDouble(c
,*score
); 
6267             addReply(c
,shared
.cone
); 
6272         /* case 2: Score update operation */ 
6273         de 
= dictFind(zs
->dict
,ele
); 
6274         redisAssert(de 
!= NULL
); 
6275         oldscore 
= dictGetEntryVal(de
); 
6276         if (*score 
!= *oldscore
) { 
6279             /* Remove and insert the element in the skip list with new score */ 
6280             deleted 
= zslDelete(zs
->zsl
,*oldscore
,ele
); 
6281             redisAssert(deleted 
!= 0); 
6282             zslInsert(zs
->zsl
,*score
,ele
); 
6284             /* Update the score in the hash table */ 
6285             dictReplace(zs
->dict
,ele
,score
); 
6291             addReplyDouble(c
,*score
); 
6293             addReply(c
,shared
.czero
); 
6297 static void zaddCommand(redisClient 
*c
) { 
6300     if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return; 
6301     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0); 
6304 static void zincrbyCommand(redisClient 
*c
) { 
6307     if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return; 
6308     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1); 
6311 static void zremCommand(redisClient 
*c
) { 
6318     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6319         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
6322     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6324         addReply(c
,shared
.czero
); 
6327     /* Delete from the skiplist */ 
6328     oldscore 
= dictGetEntryVal(de
); 
6329     deleted 
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]); 
6330     redisAssert(deleted 
!= 0); 
6332     /* Delete from the hash table */ 
6333     dictDelete(zs
->dict
,c
->argv
[2]); 
6334     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
6335     if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
6337     addReply(c
,shared
.cone
); 
6340 static void zremrangebyscoreCommand(redisClient 
*c
) { 
6347     if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) || 
6348         (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return; 
6350     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6351         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
6354     deleted 
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
); 
6355     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
6356     if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
6357     server
.dirty 
+= deleted
; 
6358     addReplyLongLong(c
,deleted
); 
6361 static void zremrangebyrankCommand(redisClient 
*c
) { 
6369     if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) || 
6370         (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return; 
6372     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6373         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
6375     llen 
= zs
->zsl
->length
; 
6377     /* convert negative indexes */ 
6378     if (start 
< 0) start 
= llen
+start
; 
6379     if (end 
< 0) end 
= llen
+end
; 
6380     if (start 
< 0) start 
= 0; 
6381     if (end 
< 0) end 
= 0; 
6383     /* indexes sanity checks */ 
6384     if (start 
> end 
|| start 
>= llen
) { 
6385         addReply(c
,shared
.czero
); 
6388     if (end 
>= llen
) end 
= llen
-1; 
6390     /* increment start and end because zsl*Rank functions 
6391      * use 1-based rank */ 
6392     deleted 
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
); 
6393     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
6394     if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
6395     server
.dirty 
+= deleted
; 
6396     addReplyLongLong(c
, deleted
); 
6404 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) { 
6405     zsetopsrc 
*d1 
= (void*) s1
, *d2 
= (void*) s2
; 
6406     unsigned long size1
, size2
; 
6407     size1 
= d1
->dict 
? dictSize(d1
->dict
) : 0; 
6408     size2 
= d2
->dict 
? dictSize(d2
->dict
) : 0; 
6409     return size1 
- size2
; 
6412 #define REDIS_AGGR_SUM 1 
6413 #define REDIS_AGGR_MIN 2 
6414 #define REDIS_AGGR_MAX 3 
6415 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e)) 
6417 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) { 
6418     if (aggregate 
== REDIS_AGGR_SUM
) { 
6419         *target 
= *target 
+ val
; 
6420     } else if (aggregate 
== REDIS_AGGR_MIN
) { 
6421         *target 
= val 
< *target 
? val 
: *target
; 
6422     } else if (aggregate 
== REDIS_AGGR_MAX
) { 
6423         *target 
= val 
> *target 
? val 
: *target
; 
6426         redisPanic("Unknown ZUNION/INTER aggregate type"); 
6430 static void zunionInterGenericCommand(redisClient 
*c
, robj 
*dstkey
, int op
) { 
6432     int aggregate 
= REDIS_AGGR_SUM
; 
6439     /* expect setnum input keys to be given */ 
6440     setnum 
= atoi(c
->argv
[2]->ptr
); 
6442         addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n")); 
6446     /* test if the expected number of keys would overflow */ 
6447     if (3+setnum 
> c
->argc
) { 
6448         addReply(c
,shared
.syntaxerr
); 
6452     /* read keys to be used for input */ 
6453     src 
= zmalloc(sizeof(zsetopsrc
) * setnum
); 
6454     for (i 
= 0, j 
= 3; i 
< setnum
; i
++, j
++) { 
6455         robj 
*obj 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
6459             if (obj
->type 
== REDIS_ZSET
) { 
6460                 src
[i
].dict 
= ((zset
*)obj
->ptr
)->dict
; 
6461             } else if (obj
->type 
== REDIS_SET
) { 
6462                 src
[i
].dict 
= (obj
->ptr
); 
6465                 addReply(c
,shared
.wrongtypeerr
); 
6470         /* default all weights to 1 */ 
6471         src
[i
].weight 
= 1.0; 
6474     /* parse optional extra arguments */ 
6476         int remaining 
= c
->argc 
- j
; 
6479             if (remaining 
>= (setnum 
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) { 
6481                 for (i 
= 0; i 
< setnum
; i
++, j
++, remaining
--) { 
6482                     if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
) 
6485             } else if (remaining 
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) { 
6487                 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) { 
6488                     aggregate 
= REDIS_AGGR_SUM
; 
6489                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) { 
6490                     aggregate 
= REDIS_AGGR_MIN
; 
6491                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) { 
6492                     aggregate 
= REDIS_AGGR_MAX
; 
6495                     addReply(c
,shared
.syntaxerr
); 
6501                 addReply(c
,shared
.syntaxerr
); 
6507     /* sort sets from the smallest to largest, this will improve our 
6508      * algorithm's performance */ 
6509     qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
); 
6511     dstobj 
= createZsetObject(); 
6512     dstzset 
= dstobj
->ptr
; 
6514     if (op 
== REDIS_OP_INTER
) { 
6515         /* skip going over all entries if the smallest zset is NULL or empty */ 
6516         if (src
[0].dict 
&& dictSize(src
[0].dict
) > 0) { 
6517             /* precondition: as src[0].dict is non-empty and the zsets are ordered 
6518              * from small to large, all src[i > 0].dict are non-empty too */ 
6519             di 
= dictGetIterator(src
[0].dict
); 
6520             while((de 
= dictNext(di
)) != NULL
) { 
6521                 double *score 
= zmalloc(sizeof(double)), value
; 
6522                 *score 
= src
[0].weight 
* zunionInterDictValue(de
); 
6524                 for (j 
= 1; j 
< setnum
; j
++) { 
6525                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
6527                         value 
= src
[j
].weight 
* zunionInterDictValue(other
); 
6528                         zunionInterAggregate(score
, value
, aggregate
); 
6534                 /* skip entry when not present in every source dict */ 
6538                     robj 
*o 
= dictGetEntryKey(de
); 
6539                     dictAdd(dstzset
->dict
,o
,score
); 
6540                     incrRefCount(o
); /* added to dictionary */ 
6541                     zslInsert(dstzset
->zsl
,*score
,o
); 
6542                     incrRefCount(o
); /* added to skiplist */ 
6545             dictReleaseIterator(di
); 
6547     } else if (op 
== REDIS_OP_UNION
) { 
6548         for (i 
= 0; i 
< setnum
; i
++) { 
6549             if (!src
[i
].dict
) continue; 
6551             di 
= dictGetIterator(src
[i
].dict
); 
6552             while((de 
= dictNext(di
)) != NULL
) { 
6553                 /* skip key when already processed */ 
6554                 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue; 
6556                 double *score 
= zmalloc(sizeof(double)), value
; 
6557                 *score 
= src
[i
].weight 
* zunionInterDictValue(de
); 
6559                 /* because the zsets are sorted by size, its only possible 
6560                  * for sets at larger indices to hold this entry */ 
6561                 for (j 
= (i
+1); j 
< setnum
; j
++) { 
6562                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
6564                         value 
= src
[j
].weight 
* zunionInterDictValue(other
); 
6565                         zunionInterAggregate(score
, value
, aggregate
); 
6569                 robj 
*o 
= dictGetEntryKey(de
); 
6570                 dictAdd(dstzset
->dict
,o
,score
); 
6571                 incrRefCount(o
); /* added to dictionary */ 
6572                 zslInsert(dstzset
->zsl
,*score
,o
); 
6573                 incrRefCount(o
); /* added to skiplist */ 
6575             dictReleaseIterator(di
); 
6578         /* unknown operator */ 
6579         redisAssert(op 
== REDIS_OP_INTER 
|| op 
== REDIS_OP_UNION
); 
6582     dbDelete(c
->db
,dstkey
); 
6583     if (dstzset
->zsl
->length
) { 
6584         dbAdd(c
->db
,dstkey
,dstobj
); 
6585         addReplyLongLong(c
, dstzset
->zsl
->length
); 
6588         decrRefCount(dstobj
); 
6589         addReply(c
, shared
.czero
); 
6594 static void zunionstoreCommand(redisClient 
*c
) { 
6595     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
); 
6598 static void zinterstoreCommand(redisClient 
*c
) { 
6599     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
); 
6602 static void zrangeGenericCommand(redisClient 
*c
, int reverse
) { 
6614     if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) || 
6615         (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return; 
6617     if (c
->argc 
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) { 
6619     } else if (c
->argc 
>= 5) { 
6620         addReply(c
,shared
.syntaxerr
); 
6624     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
6625          || checkType(c
,o
,REDIS_ZSET
)) return; 
6630     /* convert negative indexes */ 
6631     if (start 
< 0) start 
= llen
+start
; 
6632     if (end 
< 0) end 
= llen
+end
; 
6633     if (start 
< 0) start 
= 0; 
6634     if (end 
< 0) end 
= 0; 
6636     /* indexes sanity checks */ 
6637     if (start 
> end 
|| start 
>= llen
) { 
6638         /* Out of range start or start > end result in empty list */ 
6639         addReply(c
,shared
.emptymultibulk
); 
6642     if (end 
>= llen
) end 
= llen
-1; 
6643     rangelen 
= (end
-start
)+1; 
6645     /* check if starting point is trivial, before searching 
6646      * the element in log(N) time */ 
6648         ln 
= start 
== 0 ? zsl
->tail 
: zslistTypeGetElementByRank(zsl
, llen
-start
); 
6651             zsl
->header
->forward
[0] : zslistTypeGetElementByRank(zsl
, start
+1); 
6654     /* Return the result in form of a multi-bulk reply */ 
6655     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n", 
6656         withscores 
? (rangelen
*2) : rangelen
)); 
6657     for (j 
= 0; j 
< rangelen
; j
++) { 
6659         addReplyBulk(c
,ele
); 
6661             addReplyDouble(c
,ln
->score
); 
6662         ln 
= reverse 
? ln
->backward 
: ln
->forward
[0]; 
6666 static void zrangeCommand(redisClient 
*c
) { 
6667     zrangeGenericCommand(c
,0); 
6670 static void zrevrangeCommand(redisClient 
*c
) { 
6671     zrangeGenericCommand(c
,1); 
6674 /* This command implements both ZRANGEBYSCORE and ZCOUNT. 
6675  * If justcount is non-zero, just the count is returned. */ 
6676 static void genericZrangebyscoreCommand(redisClient 
*c
, int justcount
) { 
6679     int minex 
= 0, maxex 
= 0; /* are min or max exclusive? */ 
6680     int offset 
= 0, limit 
= -1; 
6684     /* Parse the min-max interval. If one of the values is prefixed 
6685      * by the "(" character, it's considered "open". For instance 
6686      * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max 
6687      * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ 
6688     if (((char*)c
->argv
[2]->ptr
)[0] == '(') { 
6689         min 
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
); 
6692         min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
6694     if (((char*)c
->argv
[3]->ptr
)[0] == '(') { 
6695         max 
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
); 
6698         max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
6701     /* Parse "WITHSCORES": note that if the command was called with 
6702      * the name ZCOUNT then we are sure that c->argc == 4, so we'll never 
6703      * enter the following paths to parse WITHSCORES and LIMIT. */ 
6704     if (c
->argc 
== 5 || c
->argc 
== 8) { 
6705         if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0) 
6710     if (c
->argc 
!= (4 + withscores
) && c
->argc 
!= (7 + withscores
)) 
6714             sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); 
6719     if (c
->argc 
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) { 
6720         addReply(c
,shared
.syntaxerr
); 
6722     } else if (c
->argc 
== (7 + withscores
)) { 
6723         offset 
= atoi(c
->argv
[5]->ptr
); 
6724         limit 
= atoi(c
->argv
[6]->ptr
); 
6725         if (offset 
< 0) offset 
= 0; 
6728     /* Ok, lookup the key and get the range */ 
6729     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6731         addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
6733         if (o
->type 
!= REDIS_ZSET
) { 
6734             addReply(c
,shared
.wrongtypeerr
); 
6736             zset 
*zsetobj 
= o
->ptr
; 
6737             zskiplist 
*zsl 
= zsetobj
->zsl
; 
6739             robj 
*ele
, *lenobj 
= NULL
; 
6740             unsigned long rangelen 
= 0; 
6742             /* Get the first node with the score >= min, or with 
6743              * score > min if 'minex' is true. */ 
6744             ln 
= zslFirstWithScore(zsl
,min
); 
6745             while (minex 
&& ln 
&& ln
->score 
== min
) ln 
= ln
->forward
[0]; 
6748                 /* No element matching the speciifed interval */ 
6749                 addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
6753             /* We don't know in advance how many matching elements there 
6754              * are in the list, so we push this object that will represent 
6755              * the multi-bulk length in the output buffer, and will "fix" 
6758                 lenobj 
= createObject(REDIS_STRING
,NULL
); 
6760                 decrRefCount(lenobj
); 
6763             while(ln 
&& (maxex 
? (ln
->score 
< max
) : (ln
->score 
<= max
))) { 
6766                     ln 
= ln
->forward
[0]; 
6769                 if (limit 
== 0) break; 
6772                     addReplyBulk(c
,ele
); 
6774                         addReplyDouble(c
,ln
->score
); 
6776                 ln 
= ln
->forward
[0]; 
6778                 if (limit 
> 0) limit
--; 
6781                 addReplyLongLong(c
,(long)rangelen
); 
6783                 lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n", 
6784                      withscores 
? (rangelen
*2) : rangelen
); 
6790 static void zrangebyscoreCommand(redisClient 
*c
) { 
6791     genericZrangebyscoreCommand(c
,0); 
6794 static void zcountCommand(redisClient 
*c
) { 
6795     genericZrangebyscoreCommand(c
,1); 
6798 static void zcardCommand(redisClient 
*c
) { 
6802     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6803         checkType(c
,o
,REDIS_ZSET
)) return; 
6806     addReplyUlong(c
,zs
->zsl
->length
); 
6809 static void zscoreCommand(redisClient 
*c
) { 
6814     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6815         checkType(c
,o
,REDIS_ZSET
)) return; 
6818     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6820         addReply(c
,shared
.nullbulk
); 
6822         double *score 
= dictGetEntryVal(de
); 
6824         addReplyDouble(c
,*score
); 
6828 static void zrankGenericCommand(redisClient 
*c
, int reverse
) { 
6836     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6837         checkType(c
,o
,REDIS_ZSET
)) return; 
6841     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6843         addReply(c
,shared
.nullbulk
); 
6847     score 
= dictGetEntryVal(de
); 
6848     rank 
= zslistTypeGetRank(zsl
, *score
, c
->argv
[2]); 
6851             addReplyLongLong(c
, zsl
->length 
- rank
); 
6853             addReplyLongLong(c
, rank
-1); 
6856         addReply(c
,shared
.nullbulk
); 
6860 static void zrankCommand(redisClient 
*c
) { 
6861     zrankGenericCommand(c
, 0); 
6864 static void zrevrankCommand(redisClient 
*c
) { 
6865     zrankGenericCommand(c
, 1); 
6868 /* ========================= Hashes utility functions ======================= */ 
6869 #define REDIS_HASH_KEY 1 
6870 #define REDIS_HASH_VALUE 2 
6872 /* Check the length of a number of objects to see if we need to convert a 
6873  * zipmap to a real hash. Note that we only check string encoded objects 
6874  * as their string length can be queried in constant time. */ 
6875 static void hashTypeTryConversion(robj 
*subject
, robj 
**argv
, int start
, int end
) { 
6877     if (subject
->encoding 
!= REDIS_ENCODING_ZIPMAP
) return; 
6879     for (i 
= start
; i 
<= end
; i
++) { 
6880         if (argv
[i
]->encoding 
== REDIS_ENCODING_RAW 
&& 
6881             sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
) 
6883             convertToRealHash(subject
); 
6889 /* Encode given objects in-place when the hash uses a dict. */ 
6890 static void hashTypeTryObjectEncoding(robj 
*subject
, robj 
**o1
, robj 
**o2
) { 
6891     if (subject
->encoding 
== REDIS_ENCODING_HT
) { 
6892         if (o1
) *o1 
= tryObjectEncoding(*o1
); 
6893         if (o2
) *o2 
= tryObjectEncoding(*o2
); 
6897 /* Get the value from a hash identified by key. Returns either a string 
6898  * object or NULL if the value cannot be found. The refcount of the object 
6899  * is always increased by 1 when the value was found. */ 
6900 static robj 
*hashTypeGet(robj 
*o
, robj 
*key
) { 
6902     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6905         key 
= getDecodedObject(key
); 
6906         if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) { 
6907             value 
= createStringObject((char*)v
,vlen
); 
6911         dictEntry 
*de 
= dictFind(o
->ptr
,key
); 
6913             value 
= dictGetEntryVal(de
); 
6914             incrRefCount(value
); 
6920 /* Test if the key exists in the given hash. Returns 1 if the key 
6921  * exists and 0 when it doesn't. */ 
6922 static int hashTypeExists(robj 
*o
, robj 
*key
) { 
6923     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6924         key 
= getDecodedObject(key
); 
6925         if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) { 
6931         if (dictFind(o
->ptr
,key
) != NULL
) { 
6938 /* Add an element, discard the old if the key already exists. 
6939  * Return 0 on insert and 1 on update. */ 
6940 static int hashTypeSet(robj 
*o
, robj 
*key
, robj 
*value
) { 
6942     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6943         key 
= getDecodedObject(key
); 
6944         value 
= getDecodedObject(value
); 
6945         o
->ptr 
= zipmapSet(o
->ptr
, 
6946             key
->ptr
,sdslen(key
->ptr
), 
6947             value
->ptr
,sdslen(value
->ptr
), &update
); 
6949         decrRefCount(value
); 
6951         /* Check if the zipmap needs to be upgraded to a real hash table */ 
6952         if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
) 
6953             convertToRealHash(o
); 
6955         if (dictReplace(o
->ptr
,key
,value
)) { 
6962         incrRefCount(value
); 
6967 /* Delete an element from a hash. 
6968  * Return 1 on deleted and 0 on not found. */ 
6969 static int hashTypeDelete(robj 
*o
, robj 
*key
) { 
6971     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6972         key 
= getDecodedObject(key
); 
6973         o
->ptr 
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
); 
6976         deleted 
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
; 
6977         /* Always check if the dictionary needs a resize after a delete. */ 
6978         if (deleted 
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
); 
6983 /* Return the number of elements in a hash. */ 
6984 static unsigned long hashTypeLength(robj 
*o
) { 
6985     return (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) ? 
6986         zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
); 
6989 /* Structure to hold hash iteration abstration. Note that iteration over 
6990  * hashes involves both fields and values. Because it is possible that 
6991  * not both are required, store pointers in the iterator to avoid 
6992  * unnecessary memory allocation for fields/values. */ 
6996     unsigned char *zk
, *zv
; 
6997     unsigned int zklen
, zvlen
; 
7003 static hashTypeIterator 
*hashTypeInitIterator(robj 
*subject
) { 
7004     hashTypeIterator 
*hi 
= zmalloc(sizeof(hashTypeIterator
)); 
7005     hi
->encoding 
= subject
->encoding
; 
7006     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
7007         hi
->zi 
= zipmapRewind(subject
->ptr
); 
7008     } else if (hi
->encoding 
== REDIS_ENCODING_HT
) { 
7009         hi
->di 
= dictGetIterator(subject
->ptr
); 
7016 static void hashTypeReleaseIterator(hashTypeIterator 
*hi
) { 
7017     if (hi
->encoding 
== REDIS_ENCODING_HT
) { 
7018         dictReleaseIterator(hi
->di
); 
7023 /* Move to the next entry in the hash. Return REDIS_OK when the next entry 
7024  * could be found and REDIS_ERR when the iterator reaches the end. */ 
7025 static int hashTypeNext(hashTypeIterator 
*hi
) { 
7026     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
7027         if ((hi
->zi 
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
, 
7028             &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
; 
7030         if ((hi
->de 
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
; 
7035 /* Get key or value object at current iteration position. 
7036  * This increases the refcount of the field object by 1. */ 
7037 static robj 
*hashTypeCurrent(hashTypeIterator 
*hi
, int what
) { 
7039     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
7040         if (what 
& REDIS_HASH_KEY
) { 
7041             o 
= createStringObject((char*)hi
->zk
,hi
->zklen
); 
7043             o 
= createStringObject((char*)hi
->zv
,hi
->zvlen
); 
7046         if (what 
& REDIS_HASH_KEY
) { 
7047             o 
= dictGetEntryKey(hi
->de
); 
7049             o 
= dictGetEntryVal(hi
->de
); 
7056 static robj 
*hashTypeLookupWriteOrCreate(redisClient 
*c
, robj 
*key
) { 
7057     robj 
*o 
= lookupKeyWrite(c
->db
,key
); 
7059         o 
= createHashObject(); 
7062         if (o
->type 
!= REDIS_HASH
) { 
7063             addReply(c
,shared
.wrongtypeerr
); 
7070 /* ============================= Hash commands ============================== */ 
7071 static void hsetCommand(redisClient 
*c
) { 
7075     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
7076     hashTypeTryConversion(o
,c
->argv
,2,3); 
7077     hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]); 
7078     update 
= hashTypeSet(o
,c
->argv
[2],c
->argv
[3]); 
7079     addReply(c
, update 
? shared
.czero 
: shared
.cone
); 
7083 static void hsetnxCommand(redisClient 
*c
) { 
7085     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
7086     hashTypeTryConversion(o
,c
->argv
,2,3); 
7088     if (hashTypeExists(o
, c
->argv
[2])) { 
7089         addReply(c
, shared
.czero
); 
7091         hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]); 
7092         hashTypeSet(o
,c
->argv
[2],c
->argv
[3]); 
7093         addReply(c
, shared
.cone
); 
7098 static void hmsetCommand(redisClient 
*c
) { 
7102     if ((c
->argc 
% 2) == 1) { 
7103         addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n")); 
7107     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
7108     hashTypeTryConversion(o
,c
->argv
,2,c
->argc
-1); 
7109     for (i 
= 2; i 
< c
->argc
; i 
+= 2) { 
7110         hashTypeTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]); 
7111         hashTypeSet(o
,c
->argv
[i
],c
->argv
[i
+1]); 
7113     addReply(c
, shared
.ok
); 
7117 static void hincrbyCommand(redisClient 
*c
) { 
7118     long long value
, incr
; 
7119     robj 
*o
, *current
, *new; 
7121     if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return; 
7122     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
7123     if ((current 
= hashTypeGet(o
,c
->argv
[2])) != NULL
) { 
7124         if (getLongLongFromObjectOrReply(c
,current
,&value
, 
7125             "hash value is not an integer") != REDIS_OK
) { 
7126             decrRefCount(current
); 
7129         decrRefCount(current
); 
7135     new = createStringObjectFromLongLong(value
); 
7136     hashTypeTryObjectEncoding(o
,&c
->argv
[2],NULL
); 
7137     hashTypeSet(o
,c
->argv
[2],new); 
7139     addReplyLongLong(c
,value
); 
7143 static void hgetCommand(redisClient 
*c
) { 
7145     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
7146         checkType(c
,o
,REDIS_HASH
)) return; 
7148     if ((value 
= hashTypeGet(o
,c
->argv
[2])) != NULL
) { 
7149         addReplyBulk(c
,value
); 
7150         decrRefCount(value
); 
7152         addReply(c
,shared
.nullbulk
); 
7156 static void hmgetCommand(redisClient 
*c
) { 
7159     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
7160     if (o 
!= NULL 
&& o
->type 
!= REDIS_HASH
) { 
7161         addReply(c
,shared
.wrongtypeerr
); 
7164     /* Note the check for o != NULL happens inside the loop. This is 
7165      * done because objects that cannot be found are considered to be 
7166      * an empty hash. The reply should then be a series of NULLs. */ 
7167     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2)); 
7168     for (i 
= 2; i 
< c
->argc
; i
++) { 
7169         if (o 
!= NULL 
&& (value 
= hashTypeGet(o
,c
->argv
[i
])) != NULL
) { 
7170             addReplyBulk(c
,value
); 
7171             decrRefCount(value
); 
7173             addReply(c
,shared
.nullbulk
); 
7178 static void hdelCommand(redisClient 
*c
) { 
7180     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
7181         checkType(c
,o
,REDIS_HASH
)) return; 
7183     if (hashTypeDelete(o
,c
->argv
[2])) { 
7184         if (hashTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
7185         addReply(c
,shared
.cone
); 
7188         addReply(c
,shared
.czero
); 
7192 static void hlenCommand(redisClient 
*c
) { 
7194     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
7195         checkType(c
,o
,REDIS_HASH
)) return; 
7197     addReplyUlong(c
,hashTypeLength(o
)); 
7200 static void genericHgetallCommand(redisClient 
*c
, int flags
) { 
7201     robj 
*o
, *lenobj
, *obj
; 
7202     unsigned long count 
= 0; 
7203     hashTypeIterator 
*hi
; 
7205     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
7206         || checkType(c
,o
,REDIS_HASH
)) return; 
7208     lenobj 
= createObject(REDIS_STRING
,NULL
); 
7210     decrRefCount(lenobj
); 
7212     hi 
= hashTypeInitIterator(o
); 
7213     while (hashTypeNext(hi
) != REDIS_ERR
) { 
7214         if (flags 
& REDIS_HASH_KEY
) { 
7215             obj 
= hashTypeCurrent(hi
,REDIS_HASH_KEY
); 
7216             addReplyBulk(c
,obj
); 
7220         if (flags 
& REDIS_HASH_VALUE
) { 
7221             obj 
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
); 
7222             addReplyBulk(c
,obj
); 
7227     hashTypeReleaseIterator(hi
); 
7229     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",count
); 
7232 static void hkeysCommand(redisClient 
*c
) { 
7233     genericHgetallCommand(c
,REDIS_HASH_KEY
); 
7236 static void hvalsCommand(redisClient 
*c
) { 
7237     genericHgetallCommand(c
,REDIS_HASH_VALUE
); 
7240 static void hgetallCommand(redisClient 
*c
) { 
7241     genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
); 
7244 static void hexistsCommand(redisClient 
*c
) { 
7246     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
7247         checkType(c
,o
,REDIS_HASH
)) return; 
7249     addReply(c
, hashTypeExists(o
,c
->argv
[2]) ? shared
.cone 
: shared
.czero
); 
7252 static void convertToRealHash(robj 
*o
) { 
7253     unsigned char *key
, *val
, *p
, *zm 
= o
->ptr
; 
7254     unsigned int klen
, vlen
; 
7255     dict 
*dict 
= dictCreate(&hashDictType
,NULL
); 
7257     assert(o
->type 
== REDIS_HASH 
&& o
->encoding 
!= REDIS_ENCODING_HT
); 
7258     p 
= zipmapRewind(zm
); 
7259     while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
7260         robj 
*keyobj
, *valobj
; 
7262         keyobj 
= createStringObject((char*)key
,klen
); 
7263         valobj 
= createStringObject((char*)val
,vlen
); 
7264         keyobj 
= tryObjectEncoding(keyobj
); 
7265         valobj 
= tryObjectEncoding(valobj
); 
7266         dictAdd(dict
,keyobj
,valobj
); 
7268     o
->encoding 
= REDIS_ENCODING_HT
; 
7273 /* ========================= Non type-specific commands  ==================== */ 
7275 static void flushdbCommand(redisClient 
*c
) { 
7276     server
.dirty 
+= dictSize(c
->db
->dict
); 
7277     touchWatchedKeysOnFlush(c
->db
->id
); 
7278     dictEmpty(c
->db
->dict
); 
7279     dictEmpty(c
->db
->expires
); 
7280     addReply(c
,shared
.ok
); 
7283 static void flushallCommand(redisClient 
*c
) { 
7284     touchWatchedKeysOnFlush(-1); 
7285     server
.dirty 
+= emptyDb(); 
7286     addReply(c
,shared
.ok
); 
7287     if (server
.bgsavechildpid 
!= -1) { 
7288         kill(server
.bgsavechildpid
,SIGKILL
); 
7289         rdbRemoveTempFile(server
.bgsavechildpid
); 
7291     rdbSave(server
.dbfilename
); 
7295 static redisSortOperation 
*createSortOperation(int type
, robj 
*pattern
) { 
7296     redisSortOperation 
*so 
= zmalloc(sizeof(*so
)); 
7298     so
->pattern 
= pattern
; 
7302 /* Return the value associated to the key with a name obtained 
7303  * substituting the first occurence of '*' in 'pattern' with 'subst'. 
7304  * The returned object will always have its refcount increased by 1 
7305  * when it is non-NULL. */ 
7306 static robj 
*lookupKeyByPattern(redisDb 
*db
, robj 
*pattern
, robj 
*subst
) { 
7309     robj keyobj
, fieldobj
, *o
; 
7310     int prefixlen
, sublen
, postfixlen
, fieldlen
; 
7311     /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */ 
7315         char buf
[REDIS_SORTKEY_MAX
+1]; 
7316     } keyname
, fieldname
; 
7318     /* If the pattern is "#" return the substitution object itself in order 
7319      * to implement the "SORT ... GET #" feature. */ 
7320     spat 
= pattern
->ptr
; 
7321     if (spat
[0] == '#' && spat
[1] == '\0') { 
7322         incrRefCount(subst
); 
7326     /* The substitution object may be specially encoded. If so we create 
7327      * a decoded object on the fly. Otherwise getDecodedObject will just 
7328      * increment the ref count, that we'll decrement later. */ 
7329     subst 
= getDecodedObject(subst
); 
7332     if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
; 
7333     p 
= strchr(spat
,'*'); 
7335         decrRefCount(subst
); 
7339     /* Find out if we're dealing with a hash dereference. */ 
7340     if ((f 
= strstr(p
+1, "->")) != NULL
) { 
7341         fieldlen 
= sdslen(spat
)-(f
-spat
); 
7342         /* this also copies \0 character */ 
7343         memcpy(fieldname
.buf
,f
+2,fieldlen
-1); 
7344         fieldname
.len 
= fieldlen
-2; 
7350     sublen 
= sdslen(ssub
); 
7351     postfixlen 
= sdslen(spat
)-(prefixlen
+1)-fieldlen
; 
7352     memcpy(keyname
.buf
,spat
,prefixlen
); 
7353     memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
); 
7354     memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
); 
7355     keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0'; 
7356     keyname
.len 
= prefixlen
+sublen
+postfixlen
; 
7357     decrRefCount(subst
); 
7359     /* Lookup substituted key */ 
7360     initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2)); 
7361     o 
= lookupKeyRead(db
,&keyobj
); 
7362     if (o 
== NULL
) return NULL
; 
7365         if (o
->type 
!= REDIS_HASH 
|| fieldname
.len 
< 1) return NULL
; 
7367         /* Retrieve value from hash by the field name. This operation 
7368          * already increases the refcount of the returned object. */ 
7369         initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2)); 
7370         o 
= hashTypeGet(o
, &fieldobj
); 
7372         if (o
->type 
!= REDIS_STRING
) return NULL
; 
7374         /* Every object that this function returns needs to have its refcount 
7375          * increased. sortCommand decreases it again. */ 
7382 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with 
7383  * the additional parameter is not standard but a BSD-specific we have to 
7384  * pass sorting parameters via the global 'server' structure */ 
7385 static int sortCompare(const void *s1
, const void *s2
) { 
7386     const redisSortObject 
*so1 
= s1
, *so2 
= s2
; 
7389     if (!server
.sort_alpha
) { 
7390         /* Numeric sorting. Here it's trivial as we precomputed scores */ 
7391         if (so1
->u
.score 
> so2
->u
.score
) { 
7393         } else if (so1
->u
.score 
< so2
->u
.score
) { 
7399         /* Alphanumeric sorting */ 
7400         if (server
.sort_bypattern
) { 
7401             if (!so1
->u
.cmpobj 
|| !so2
->u
.cmpobj
) { 
7402                 /* At least one compare object is NULL */ 
7403                 if (so1
->u
.cmpobj 
== so2
->u
.cmpobj
) 
7405                 else if (so1
->u
.cmpobj 
== NULL
) 
7410                 /* We have both the objects, use strcoll */ 
7411                 cmp 
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
); 
7414             /* Compare elements directly. */ 
7415             cmp 
= compareStringObjects(so1
->obj
,so2
->obj
); 
7418     return server
.sort_desc 
? -cmp 
: cmp
; 
7421 /* The SORT command is the most complex command in Redis. Warning: this code 
7422  * is optimized for speed and a bit less for readability */ 
7423 static void sortCommand(redisClient 
*c
) { 
7425     unsigned int outputlen 
= 0; 
7426     int desc 
= 0, alpha 
= 0; 
7427     int limit_start 
= 0, limit_count 
= -1, start
, end
; 
7428     int j
, dontsort 
= 0, vectorlen
; 
7429     int getop 
= 0; /* GET operation counter */ 
7430     robj 
*sortval
, *sortby 
= NULL
, *storekey 
= NULL
; 
7431     redisSortObject 
*vector
; /* Resulting vector to sort */ 
7433     /* Lookup the key to sort. It must be of the right types */ 
7434     sortval 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
7435     if (sortval 
== NULL
) { 
7436         addReply(c
,shared
.emptymultibulk
); 
7439     if (sortval
->type 
!= REDIS_SET 
&& sortval
->type 
!= REDIS_LIST 
&& 
7440         sortval
->type 
!= REDIS_ZSET
) 
7442         addReply(c
,shared
.wrongtypeerr
); 
7446     /* Create a list of operations to perform for every sorted element. 
7447      * Operations can be GET/DEL/INCR/DECR */ 
7448     operations 
= listCreate(); 
7449     listSetFreeMethod(operations
,zfree
); 
7452     /* Now we need to protect sortval incrementing its count, in the future 
7453      * SORT may have options able to overwrite/delete keys during the sorting 
7454      * and the sorted key itself may get destroied */ 
7455     incrRefCount(sortval
); 
7457     /* The SORT command has an SQL-alike syntax, parse it */ 
7458     while(j 
< c
->argc
) { 
7459         int leftargs 
= c
->argc
-j
-1; 
7460         if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) { 
7462         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) { 
7464         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) { 
7466         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs 
>= 2) { 
7467             limit_start 
= atoi(c
->argv
[j
+1]->ptr
); 
7468             limit_count 
= atoi(c
->argv
[j
+2]->ptr
); 
7470         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs 
>= 1) { 
7471             storekey 
= c
->argv
[j
+1]; 
7473         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs 
>= 1) { 
7474             sortby 
= c
->argv
[j
+1]; 
7475             /* If the BY pattern does not contain '*', i.e. it is constant, 
7476              * we don't need to sort nor to lookup the weight keys. */ 
7477             if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort 
= 1; 
7479         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs 
>= 1) { 
7480             listAddNodeTail(operations
,createSortOperation( 
7481                 REDIS_SORT_GET
,c
->argv
[j
+1])); 
7485             decrRefCount(sortval
); 
7486             listRelease(operations
); 
7487             addReply(c
,shared
.syntaxerr
); 
7493     /* Load the sorting vector with all the objects to sort */ 
7494     switch(sortval
->type
) { 
7495     case REDIS_LIST
: vectorlen 
= listTypeLength(sortval
); break; 
7496     case REDIS_SET
: vectorlen 
=  dictSize((dict
*)sortval
->ptr
); break; 
7497     case REDIS_ZSET
: vectorlen 
= dictSize(((zset
*)sortval
->ptr
)->dict
); break; 
7498     default: vectorlen 
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */ 
7500     vector 
= zmalloc(sizeof(redisSortObject
)*vectorlen
); 
7503     if (sortval
->type 
== REDIS_LIST
) { 
7504         listTypeIterator 
*li 
= listTypeInitIterator(sortval
,0,REDIS_TAIL
); 
7505         listTypeEntry entry
; 
7506         while(listTypeNext(li
,&entry
)) { 
7507             vector
[j
].obj 
= listTypeGet(&entry
); 
7508             vector
[j
].u
.score 
= 0; 
7509             vector
[j
].u
.cmpobj 
= NULL
; 
7512         listTypeReleaseIterator(li
); 
7518         if (sortval
->type 
== REDIS_SET
) { 
7521             zset 
*zs 
= sortval
->ptr
; 
7525         di 
= dictGetIterator(set
); 
7526         while((setele 
= dictNext(di
)) != NULL
) { 
7527             vector
[j
].obj 
= dictGetEntryKey(setele
); 
7528             vector
[j
].u
.score 
= 0; 
7529             vector
[j
].u
.cmpobj 
= NULL
; 
7532         dictReleaseIterator(di
); 
7534     redisAssert(j 
== vectorlen
); 
7536     /* Now it's time to load the right scores in the sorting vector */ 
7537     if (dontsort 
== 0) { 
7538         for (j 
= 0; j 
< vectorlen
; j
++) { 
7541                 /* lookup value to sort by */ 
7542                 byval 
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
); 
7543                 if (!byval
) continue; 
7545                 /* use object itself to sort by */ 
7546                 byval 
= vector
[j
].obj
; 
7550                 if (sortby
) vector
[j
].u
.cmpobj 
= getDecodedObject(byval
); 
7552                 if (byval
->encoding 
== REDIS_ENCODING_RAW
) { 
7553                     vector
[j
].u
.score 
= strtod(byval
->ptr
,NULL
); 
7554                 } else if (byval
->encoding 
== REDIS_ENCODING_INT
) { 
7555                     /* Don't need to decode the object if it's 
7556                      * integer-encoded (the only encoding supported) so 
7557                      * far. We can just cast it */ 
7558                     vector
[j
].u
.score 
= (long)byval
->ptr
; 
7560                     redisAssert(1 != 1); 
7564             /* when the object was retrieved using lookupKeyByPattern, 
7565              * its refcount needs to be decreased. */ 
7567                 decrRefCount(byval
); 
7572     /* We are ready to sort the vector... perform a bit of sanity check 
7573      * on the LIMIT option too. We'll use a partial version of quicksort. */ 
7574     start 
= (limit_start 
< 0) ? 0 : limit_start
; 
7575     end 
= (limit_count 
< 0) ? vectorlen
-1 : start
+limit_count
-1; 
7576     if (start 
>= vectorlen
) { 
7577         start 
= vectorlen
-1; 
7580     if (end 
>= vectorlen
) end 
= vectorlen
-1; 
7582     if (dontsort 
== 0) { 
7583         server
.sort_desc 
= desc
; 
7584         server
.sort_alpha 
= alpha
; 
7585         server
.sort_bypattern 
= sortby 
? 1 : 0; 
7586         if (sortby 
&& (start 
!= 0 || end 
!= vectorlen
-1)) 
7587             pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
); 
7589             qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
); 
7592     /* Send command output to the output buffer, performing the specified 
7593      * GET/DEL/INCR/DECR operations if any. */ 
7594     outputlen 
= getop 
? getop
*(end
-start
+1) : end
-start
+1; 
7595     if (storekey 
== NULL
) { 
7596         /* STORE option not specified, sent the sorting result to client */ 
7597         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
)); 
7598         for (j 
= start
; j 
<= end
; j
++) { 
7602             if (!getop
) addReplyBulk(c
,vector
[j
].obj
); 
7603             listRewind(operations
,&li
); 
7604             while((ln 
= listNext(&li
))) { 
7605                 redisSortOperation 
*sop 
= ln
->value
; 
7606                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
7609                 if (sop
->type 
== REDIS_SORT_GET
) { 
7611                         addReply(c
,shared
.nullbulk
); 
7613                         addReplyBulk(c
,val
); 
7617                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
7622         robj 
*sobj 
= createZiplistObject(); 
7624         /* STORE option specified, set the sorting result as a List object */ 
7625         for (j 
= start
; j 
<= end
; j
++) { 
7630                 listTypePush(sobj
,vector
[j
].obj
,REDIS_TAIL
); 
7632                 listRewind(operations
,&li
); 
7633                 while((ln 
= listNext(&li
))) { 
7634                     redisSortOperation 
*sop 
= ln
->value
; 
7635                     robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
7638                     if (sop
->type 
== REDIS_SORT_GET
) { 
7639                         if (!val
) val 
= createStringObject("",0); 
7641                         /* listTypePush does an incrRefCount, so we should take care 
7642                          * care of the incremented refcount caused by either 
7643                          * lookupKeyByPattern or createStringObject("",0) */ 
7644                         listTypePush(sobj
,val
,REDIS_TAIL
); 
7648                         redisAssert(sop
->type 
== REDIS_SORT_GET
); 
7653         dbReplace(c
->db
,storekey
,sobj
); 
7654         /* Note: we add 1 because the DB is dirty anyway since even if the 
7655          * SORT result is empty a new key is set and maybe the old content 
7657         server
.dirty 
+= 1+outputlen
; 
7658         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
)); 
7662     if (sortval
->type 
== REDIS_LIST
) 
7663         for (j 
= 0; j 
< vectorlen
; j
++) 
7664             decrRefCount(vector
[j
].obj
); 
7665     decrRefCount(sortval
); 
7666     listRelease(operations
); 
7667     for (j 
= 0; j 
< vectorlen
; j
++) { 
7668         if (alpha 
&& vector
[j
].u
.cmpobj
) 
7669             decrRefCount(vector
[j
].u
.cmpobj
); 
7674 /* Convert an amount of bytes into a human readable string in the form 
7675  * of 100B, 2G, 100M, 4K, and so forth. */ 
7676 static void bytesToHuman(char *s
, unsigned long long n
) { 
7681         sprintf(s
,"%lluB",n
); 
7683     } else if (n 
< (1024*1024)) { 
7684         d 
= (double)n
/(1024); 
7685         sprintf(s
,"%.2fK",d
); 
7686     } else if (n 
< (1024LL*1024*1024)) { 
7687         d 
= (double)n
/(1024*1024); 
7688         sprintf(s
,"%.2fM",d
); 
7689     } else if (n 
< (1024LL*1024*1024*1024)) { 
7690         d 
= (double)n
/(1024LL*1024*1024); 
7691         sprintf(s
,"%.2fG",d
); 
7695 /* Create the string returned by the INFO command. This is decoupled 
7696  * by the INFO command itself as we need to report the same information 
7697  * on memory corruption problems. */ 
7698 static sds 
genRedisInfoString(void) { 
7700     time_t uptime 
= time(NULL
)-server
.stat_starttime
; 
7704     bytesToHuman(hmem
,zmalloc_used_memory()); 
7705     info 
= sdscatprintf(sdsempty(), 
7706         "redis_version:%s\r\n" 
7707         "redis_git_sha1:%s\r\n" 
7708         "redis_git_dirty:%d\r\n" 
7710         "multiplexing_api:%s\r\n" 
7711         "process_id:%ld\r\n" 
7712         "uptime_in_seconds:%ld\r\n" 
7713         "uptime_in_days:%ld\r\n" 
7714         "connected_clients:%d\r\n" 
7715         "connected_slaves:%d\r\n" 
7716         "blocked_clients:%d\r\n" 
7717         "used_memory:%zu\r\n" 
7718         "used_memory_human:%s\r\n" 
7719         "changes_since_last_save:%lld\r\n" 
7720         "bgsave_in_progress:%d\r\n" 
7721         "last_save_time:%ld\r\n" 
7722         "bgrewriteaof_in_progress:%d\r\n" 
7723         "total_connections_received:%lld\r\n" 
7724         "total_commands_processed:%lld\r\n" 
7725         "expired_keys:%lld\r\n" 
7726         "hash_max_zipmap_entries:%zu\r\n" 
7727         "hash_max_zipmap_value:%zu\r\n" 
7728         "pubsub_channels:%ld\r\n" 
7729         "pubsub_patterns:%u\r\n" 
7734         strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0, 
7735         (sizeof(long) == 8) ? "64" : "32", 
7740         listLength(server
.clients
)-listLength(server
.slaves
), 
7741         listLength(server
.slaves
), 
7742         server
.blpop_blocked_clients
, 
7743         zmalloc_used_memory(), 
7746         server
.bgsavechildpid 
!= -1, 
7748         server
.bgrewritechildpid 
!= -1, 
7749         server
.stat_numconnections
, 
7750         server
.stat_numcommands
, 
7751         server
.stat_expiredkeys
, 
7752         server
.hash_max_zipmap_entries
, 
7753         server
.hash_max_zipmap_value
, 
7754         dictSize(server
.pubsub_channels
), 
7755         listLength(server
.pubsub_patterns
), 
7756         server
.vm_enabled 
!= 0, 
7757         server
.masterhost 
== NULL 
? "master" : "slave" 
7759     if (server
.masterhost
) { 
7760         info 
= sdscatprintf(info
, 
7761             "master_host:%s\r\n" 
7762             "master_port:%d\r\n" 
7763             "master_link_status:%s\r\n" 
7764             "master_last_io_seconds_ago:%d\r\n" 
7767             (server
.replstate 
== REDIS_REPL_CONNECTED
) ? 
7769             server
.master 
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1 
7772     if (server
.vm_enabled
) { 
7774         info 
= sdscatprintf(info
, 
7775             "vm_conf_max_memory:%llu\r\n" 
7776             "vm_conf_page_size:%llu\r\n" 
7777             "vm_conf_pages:%llu\r\n" 
7778             "vm_stats_used_pages:%llu\r\n" 
7779             "vm_stats_swapped_objects:%llu\r\n" 
7780             "vm_stats_swappin_count:%llu\r\n" 
7781             "vm_stats_swappout_count:%llu\r\n" 
7782             "vm_stats_io_newjobs_len:%lu\r\n" 
7783             "vm_stats_io_processing_len:%lu\r\n" 
7784             "vm_stats_io_processed_len:%lu\r\n" 
7785             "vm_stats_io_active_threads:%lu\r\n" 
7786             "vm_stats_blocked_clients:%lu\r\n" 
7787             ,(unsigned long long) server
.vm_max_memory
, 
7788             (unsigned long long) server
.vm_page_size
, 
7789             (unsigned long long) server
.vm_pages
, 
7790             (unsigned long long) server
.vm_stats_used_pages
, 
7791             (unsigned long long) server
.vm_stats_swapped_objects
, 
7792             (unsigned long long) server
.vm_stats_swapins
, 
7793             (unsigned long long) server
.vm_stats_swapouts
, 
7794             (unsigned long) listLength(server
.io_newjobs
), 
7795             (unsigned long) listLength(server
.io_processing
), 
7796             (unsigned long) listLength(server
.io_processed
), 
7797             (unsigned long) server
.io_active_threads
, 
7798             (unsigned long) server
.vm_blocked_clients
 
7802     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7803         long long keys
, vkeys
; 
7805         keys 
= dictSize(server
.db
[j
].dict
); 
7806         vkeys 
= dictSize(server
.db
[j
].expires
); 
7807         if (keys 
|| vkeys
) { 
7808             info 
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n", 
7815 static void infoCommand(redisClient 
*c
) { 
7816     sds info 
= genRedisInfoString(); 
7817     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n", 
7818         (unsigned long)sdslen(info
))); 
7819     addReplySds(c
,info
); 
7820     addReply(c
,shared
.crlf
); 
7823 static void monitorCommand(redisClient 
*c
) { 
7824     /* ignore MONITOR if aleady slave or in monitor mode */ 
7825     if (c
->flags 
& REDIS_SLAVE
) return; 
7827     c
->flags 
|= (REDIS_SLAVE
|REDIS_MONITOR
); 
7829     listAddNodeTail(server
.monitors
,c
); 
7830     addReply(c
,shared
.ok
); 
7833 /* ================================= Expire ================================= */ 
7834 static int removeExpire(redisDb 
*db
, robj 
*key
) { 
7835     if (dictDelete(db
->expires
,key
->ptr
) == DICT_OK
) { 
7842 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
7843     sds copy 
= sdsdup(key
->ptr
); 
7844     if (dictAdd(db
->expires
,copy
,(void*)when
) == DICT_ERR
) { 
7852 /* Return the expire time of the specified key, or -1 if no expire 
7853  * is associated with this key (i.e. the key is non volatile) */ 
7854 static time_t getExpire(redisDb 
*db
, robj 
*key
) { 
7857     /* No expire? return ASAP */ 
7858     if (dictSize(db
->expires
) == 0 || 
7859        (de 
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return -1; 
7861     return (time_t) dictGetEntryVal(de
); 
7864 static int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
7868     /* No expire? return ASAP */ 
7869     if (dictSize(db
->expires
) == 0 || 
7870        (de 
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0; 
7872     /* Lookup the expire */ 
7873     when 
= (time_t) dictGetEntryVal(de
); 
7874     if (time(NULL
) <= when
) return 0; 
7876     /* Delete the key */ 
7878     server
.stat_expiredkeys
++; 
7882 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
) { 
7885     /* No expire? return ASAP */ 
7886     if (dictSize(db
->expires
) == 0 || 
7887        (de 
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0; 
7889     /* Delete the key */ 
7891     server
.stat_expiredkeys
++; 
7892     dictDelete(db
->expires
,key
->ptr
); 
7893     return dictDelete(db
->dict
,key
->ptr
) == DICT_OK
; 
7896 static void expireGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*param
, long offset
) { 
7900     if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return; 
7904     de 
= dictFind(c
->db
->dict
,key
->ptr
); 
7906         addReply(c
,shared
.czero
); 
7910         if (dbDelete(c
->db
,key
)) server
.dirty
++; 
7911         addReply(c
, shared
.cone
); 
7914         time_t when 
= time(NULL
)+seconds
; 
7915         if (setExpire(c
->db
,key
,when
)) { 
7916             addReply(c
,shared
.cone
); 
7919             addReply(c
,shared
.czero
); 
7925 static void expireCommand(redisClient 
*c
) { 
7926     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0); 
7929 static void expireatCommand(redisClient 
*c
) { 
7930     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
)); 
7933 static void ttlCommand(redisClient 
*c
) { 
7937     expire 
= getExpire(c
->db
,c
->argv
[1]); 
7939         ttl 
= (int) (expire
-time(NULL
)); 
7940         if (ttl 
< 0) ttl 
= -1; 
7942     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
)); 
7945 /* ================================ MULTI/EXEC ============================== */ 
7947 /* Client state initialization for MULTI/EXEC */ 
7948 static void initClientMultiState(redisClient 
*c
) { 
7949     c
->mstate
.commands 
= NULL
; 
7950     c
->mstate
.count 
= 0; 
7953 /* Release all the resources associated with MULTI/EXEC state */ 
7954 static void freeClientMultiState(redisClient 
*c
) { 
7957     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
7959         multiCmd 
*mc 
= c
->mstate
.commands
+j
; 
7961         for (i 
= 0; i 
< mc
->argc
; i
++) 
7962             decrRefCount(mc
->argv
[i
]); 
7965     zfree(c
->mstate
.commands
); 
7968 /* Add a new command into the MULTI commands queue */ 
7969 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
) { 
7973     c
->mstate
.commands 
= zrealloc(c
->mstate
.commands
, 
7974             sizeof(multiCmd
)*(c
->mstate
.count
+1)); 
7975     mc 
= c
->mstate
.commands
+c
->mstate
.count
; 
7978     mc
->argv 
= zmalloc(sizeof(robj
*)*c
->argc
); 
7979     memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
); 
7980     for (j 
= 0; j 
< c
->argc
; j
++) 
7981         incrRefCount(mc
->argv
[j
]); 
7985 static void multiCommand(redisClient 
*c
) { 
7986     if (c
->flags 
& REDIS_MULTI
) { 
7987         addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n")); 
7990     c
->flags 
|= REDIS_MULTI
; 
7991     addReply(c
,shared
.ok
); 
7994 static void discardCommand(redisClient 
*c
) { 
7995     if (!(c
->flags 
& REDIS_MULTI
)) { 
7996         addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n")); 
8000     freeClientMultiState(c
); 
8001     initClientMultiState(c
); 
8002     c
->flags 
&= (~REDIS_MULTI
); 
8003     addReply(c
,shared
.ok
); 
8006 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand 
8007  * implememntation for more information. */ 
8008 static void execCommandReplicateMulti(redisClient 
*c
) { 
8009     struct redisCommand 
*cmd
; 
8010     robj 
*multistring 
= createStringObject("MULTI",5); 
8012     cmd 
= lookupCommand("multi"); 
8013     if (server
.appendonly
) 
8014         feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1); 
8015     if (listLength(server
.slaves
)) 
8016         replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1); 
8017     decrRefCount(multistring
); 
8020 static void execCommand(redisClient 
*c
) { 
8025     if (!(c
->flags 
& REDIS_MULTI
)) { 
8026         addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n")); 
8030     /* Check if we need to abort the EXEC if some WATCHed key was touched. 
8031      * A failed EXEC will return a multi bulk nil object. */ 
8032     if (c
->flags 
& REDIS_DIRTY_CAS
) { 
8033         freeClientMultiState(c
); 
8034         initClientMultiState(c
); 
8035         c
->flags 
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
); 
8037         addReply(c
,shared
.nullmultibulk
); 
8041     /* Replicate a MULTI request now that we are sure the block is executed. 
8042      * This way we'll deliver the MULTI/..../EXEC block as a whole and 
8043      * both the AOF and the replication link will have the same consistency 
8044      * and atomicity guarantees. */ 
8045     execCommandReplicateMulti(c
); 
8047     /* Exec all the queued commands */ 
8048     unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */ 
8049     orig_argv 
= c
->argv
; 
8050     orig_argc 
= c
->argc
; 
8051     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
)); 
8052     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
8053         c
->argc 
= c
->mstate
.commands
[j
].argc
; 
8054         c
->argv 
= c
->mstate
.commands
[j
].argv
; 
8055         call(c
,c
->mstate
.commands
[j
].cmd
); 
8057     c
->argv 
= orig_argv
; 
8058     c
->argc 
= orig_argc
; 
8059     freeClientMultiState(c
); 
8060     initClientMultiState(c
); 
8061     c
->flags 
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
); 
8062     /* Make sure the EXEC command is always replicated / AOF, since we 
8063      * always send the MULTI command (we can't know beforehand if the 
8064      * next operations will contain at least a modification to the DB). */ 
8068 /* =========================== Blocking Operations  ========================= */ 
8070 /* Currently Redis blocking operations support is limited to list POP ops, 
8071  * so the current implementation is not fully generic, but it is also not 
8072  * completely specific so it will not require a rewrite to support new 
8073  * kind of blocking operations in the future. 
8075  * Still it's important to note that list blocking operations can be already 
8076  * used as a notification mechanism in order to implement other blocking 
8077  * operations at application level, so there must be a very strong evidence 
8078  * of usefulness and generality before new blocking operations are implemented. 
8080  * This is how the current blocking POP works, we use BLPOP as example: 
8081  * - If the user calls BLPOP and the key exists and contains a non empty list 
8082  *   then LPOP is called instead. So BLPOP is semantically the same as LPOP 
8083  *   if there is not to block. 
8084  * - If instead BLPOP is called and the key does not exists or the list is 
8085  *   empty we need to block. In order to do so we remove the notification for 
8086  *   new data to read in the client socket (so that we'll not serve new 
8087  *   requests if the blocking request is not served). Also we put the client 
8088  *   in a dictionary (db->blocking_keys) mapping keys to a list of clients 
8089  *   blocking for this keys. 
8090  * - If a PUSH operation against a key with blocked clients waiting is 
8091  *   performed, we serve the first in the list: basically instead to push 
8092  *   the new element inside the list we return it to the (first / oldest) 
8093  *   blocking client, unblock the client, and remove it form the list. 
8095  * The above comment and the source code should be enough in order to understand 
8096  * the implementation and modify / fix it later. 
8099 /* Set a client in blocking mode for the specified key, with the specified 
8101 static void blockForKeys(redisClient 
*c
, robj 
**keys
, int numkeys
, time_t timeout
) { 
8106     c
->blocking_keys 
= zmalloc(sizeof(robj
*)*numkeys
); 
8107     c
->blocking_keys_num 
= numkeys
; 
8108     c
->blockingto 
= timeout
; 
8109     for (j 
= 0; j 
< numkeys
; j
++) { 
8110         /* Add the key in the client structure, to map clients -> keys */ 
8111         c
->blocking_keys
[j
] = keys
[j
]; 
8112         incrRefCount(keys
[j
]); 
8114         /* And in the other "side", to map keys -> clients */ 
8115         de 
= dictFind(c
->db
->blocking_keys
,keys
[j
]); 
8119             /* For every key we take a list of clients blocked for it */ 
8121             retval 
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
); 
8122             incrRefCount(keys
[j
]); 
8123             assert(retval 
== DICT_OK
); 
8125             l 
= dictGetEntryVal(de
); 
8127         listAddNodeTail(l
,c
); 
8129     /* Mark the client as a blocked client */ 
8130     c
->flags 
|= REDIS_BLOCKED
; 
8131     server
.blpop_blocked_clients
++; 
8134 /* Unblock a client that's waiting in a blocking operation such as BLPOP */ 
8135 static void unblockClientWaitingData(redisClient 
*c
) { 
8140     assert(c
->blocking_keys 
!= NULL
); 
8141     /* The client may wait for multiple keys, so unblock it for every key. */ 
8142     for (j 
= 0; j 
< c
->blocking_keys_num
; j
++) { 
8143         /* Remove this client from the list of clients waiting for this key. */ 
8144         de 
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]); 
8146         l 
= dictGetEntryVal(de
); 
8147         listDelNode(l
,listSearchKey(l
,c
)); 
8148         /* If the list is empty we need to remove it to avoid wasting memory */ 
8149         if (listLength(l
) == 0) 
8150             dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]); 
8151         decrRefCount(c
->blocking_keys
[j
]); 
8153     /* Cleanup the client structure */ 
8154     zfree(c
->blocking_keys
); 
8155     c
->blocking_keys 
= NULL
; 
8156     c
->flags 
&= (~REDIS_BLOCKED
); 
8157     server
.blpop_blocked_clients
--; 
8158     /* We want to process data if there is some command waiting 
8159      * in the input buffer. Note that this is safe even if 
8160      * unblockClientWaitingData() gets called from freeClient() because 
8161      * freeClient() will be smart enough to call this function 
8162      * *after* c->querybuf was set to NULL. */ 
8163     if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
); 
8166 /* This should be called from any function PUSHing into lists. 
8167  * 'c' is the "pushing client", 'key' is the key it is pushing data against, 
8168  * 'ele' is the element pushed. 
8170  * If the function returns 0 there was no client waiting for a list push 
8173  * If the function returns 1 there was a client waiting for a list push 
8174  * against this key, the element was passed to this client thus it's not 
8175  * needed to actually add it to the list and the caller should return asap. */ 
8176 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
) { 
8177     struct dictEntry 
*de
; 
8178     redisClient 
*receiver
; 
8182     de 
= dictFind(c
->db
->blocking_keys
,key
); 
8183     if (de 
== NULL
) return 0; 
8184     l 
= dictGetEntryVal(de
); 
8187     receiver 
= ln
->value
; 
8189     addReplySds(receiver
,sdsnew("*2\r\n")); 
8190     addReplyBulk(receiver
,key
); 
8191     addReplyBulk(receiver
,ele
); 
8192     unblockClientWaitingData(receiver
); 
8196 /* Blocking RPOP/LPOP */ 
8197 static void blockingPopGenericCommand(redisClient 
*c
, int where
) { 
8202     for (j 
= 1; j 
< c
->argc
-1; j
++) { 
8203         o 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
8205             if (o
->type 
!= REDIS_LIST
) { 
8206                 addReply(c
,shared
.wrongtypeerr
); 
8209                 list 
*list 
= o
->ptr
; 
8210                 if (listLength(list
) != 0) { 
8211                     /* If the list contains elements fall back to the usual 
8212                      * non-blocking POP operation */ 
8213                     robj 
*argv
[2], **orig_argv
; 
8216                     /* We need to alter the command arguments before to call 
8217                      * popGenericCommand() as the command takes a single key. */ 
8218                     orig_argv 
= c
->argv
; 
8219                     orig_argc 
= c
->argc
; 
8220                     argv
[1] = c
->argv
[j
]; 
8224                     /* Also the return value is different, we need to output 
8225                      * the multi bulk reply header and the key name. The 
8226                      * "real" command will add the last element (the value) 
8227                      * for us. If this souds like an hack to you it's just 
8228                      * because it is... */ 
8229                     addReplySds(c
,sdsnew("*2\r\n")); 
8230                     addReplyBulk(c
,argv
[1]); 
8231                     popGenericCommand(c
,where
); 
8233                     /* Fix the client structure with the original stuff */ 
8234                     c
->argv 
= orig_argv
; 
8235                     c
->argc 
= orig_argc
; 
8241     /* If the list is empty or the key does not exists we must block */ 
8242     timeout 
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10); 
8243     if (timeout 
> 0) timeout 
+= time(NULL
); 
8244     blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
); 
8247 static void blpopCommand(redisClient 
*c
) { 
8248     blockingPopGenericCommand(c
,REDIS_HEAD
); 
8251 static void brpopCommand(redisClient 
*c
) { 
8252     blockingPopGenericCommand(c
,REDIS_TAIL
); 
8255 /* =============================== Replication  ============================= */ 
8257 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
8258     ssize_t nwritten
, ret 
= size
; 
8259     time_t start 
= time(NULL
); 
8263         if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) { 
8264             nwritten 
= write(fd
,ptr
,size
); 
8265             if (nwritten 
== -1) return -1; 
8269         if ((time(NULL
)-start
) > timeout
) { 
8277 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
8278     ssize_t nread
, totread 
= 0; 
8279     time_t start 
= time(NULL
); 
8283         if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) { 
8284             nread 
= read(fd
,ptr
,size
); 
8285             if (nread 
== -1) return -1; 
8290         if ((time(NULL
)-start
) > timeout
) { 
8298 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
8305         if (syncRead(fd
,&c
,1,timeout
) == -1) return -1; 
8308             if (nread 
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0'; 
8319 static void syncCommand(redisClient 
*c
) { 
8320     /* ignore SYNC if aleady slave or in monitor mode */ 
8321     if (c
->flags 
& REDIS_SLAVE
) return; 
8323     /* SYNC can't be issued when the server has pending data to send to 
8324      * the client about already issued commands. We need a fresh reply 
8325      * buffer registering the differences between the BGSAVE and the current 
8326      * dataset, so that we can copy to other slaves if needed. */ 
8327     if (listLength(c
->reply
) != 0) { 
8328         addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n")); 
8332     redisLog(REDIS_NOTICE
,"Slave ask for synchronization"); 
8333     /* Here we need to check if there is a background saving operation 
8334      * in progress, or if it is required to start one */ 
8335     if (server
.bgsavechildpid 
!= -1) { 
8336         /* Ok a background save is in progress. Let's check if it is a good 
8337          * one for replication, i.e. if there is another slave that is 
8338          * registering differences since the server forked to save */ 
8343         listRewind(server
.slaves
,&li
); 
8344         while((ln 
= listNext(&li
))) { 
8346             if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) break; 
8349             /* Perfect, the server is already registering differences for 
8350              * another slave. Set the right state, and copy the buffer. */ 
8351             listRelease(c
->reply
); 
8352             c
->reply 
= listDup(slave
->reply
); 
8353             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
8354             redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC"); 
8356             /* No way, we need to wait for the next BGSAVE in order to 
8357              * register differences */ 
8358             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
8359             redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC"); 
8362         /* Ok we don't have a BGSAVE in progress, let's start one */ 
8363         redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC"); 
8364         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
8365             redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE"); 
8366             addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n")); 
8369         c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
8372     c
->flags 
|= REDIS_SLAVE
; 
8374     listAddNodeTail(server
.slaves
,c
); 
8378 static void sendBulkToSlave(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
8379     redisClient 
*slave 
= privdata
; 
8381     REDIS_NOTUSED(mask
); 
8382     char buf
[REDIS_IOBUF_LEN
]; 
8383     ssize_t nwritten
, buflen
; 
8385     if (slave
->repldboff 
== 0) { 
8386         /* Write the bulk write count before to transfer the DB. In theory here 
8387          * we don't know how much room there is in the output buffer of the 
8388          * socket, but in pratice SO_SNDLOWAT (the minimum count for output 
8389          * operations) will never be smaller than the few bytes we need. */ 
8392         bulkcount 
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long) 
8394         if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
)) 
8402     lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
); 
8403     buflen 
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
); 
8405         redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s", 
8406             (buflen 
== 0) ? "premature EOF" : strerror(errno
)); 
8410     if ((nwritten 
= write(fd
,buf
,buflen
)) == -1) { 
8411         redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s", 
8416     slave
->repldboff 
+= nwritten
; 
8417     if (slave
->repldboff 
== slave
->repldbsize
) { 
8418         close(slave
->repldbfd
); 
8419         slave
->repldbfd 
= -1; 
8420         aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
8421         slave
->replstate 
= REDIS_REPL_ONLINE
; 
8422         if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, 
8423             sendReplyToClient
, slave
) == AE_ERR
) { 
8427         addReplySds(slave
,sdsempty()); 
8428         redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded"); 
8432 /* This function is called at the end of every backgrond saving. 
8433  * The argument bgsaveerr is REDIS_OK if the background saving succeeded 
8434  * otherwise REDIS_ERR is passed to the function. 
8436  * The goal of this function is to handle slaves waiting for a successful 
8437  * background saving in order to perform non-blocking synchronization. */ 
8438 static void updateSlavesWaitingBgsave(int bgsaveerr
) { 
8440     int startbgsave 
= 0; 
8443     listRewind(server
.slaves
,&li
); 
8444     while((ln 
= listNext(&li
))) { 
8445         redisClient 
*slave 
= ln
->value
; 
8447         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) { 
8449             slave
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
8450         } else if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) { 
8451             struct redis_stat buf
; 
8453             if (bgsaveerr 
!= REDIS_OK
) { 
8455                 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error"); 
8458             if ((slave
->repldbfd 
= open(server
.dbfilename
,O_RDONLY
)) == -1 || 
8459                 redis_fstat(slave
->repldbfd
,&buf
) == -1) { 
8461                 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
)); 
8464             slave
->repldboff 
= 0; 
8465             slave
->repldbsize 
= buf
.st_size
; 
8466             slave
->replstate 
= REDIS_REPL_SEND_BULK
; 
8467             aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
8468             if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) { 
8475         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
8478             listRewind(server
.slaves
,&li
); 
8479             redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed"); 
8480             while((ln 
= listNext(&li
))) { 
8481                 redisClient 
*slave 
= ln
->value
; 
8483                 if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) 
8490 static int syncWithMaster(void) { 
8491     char buf
[1024], tmpfile
[256], authcmd
[1024]; 
8493     int fd 
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
); 
8494     int dfd
, maxtries 
= 5; 
8497         redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s", 
8502     /* AUTH with the master if required. */ 
8503     if(server
.masterauth
) { 
8504         snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
); 
8505         if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) { 
8507             redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s", 
8511         /* Read the AUTH result.  */ 
8512         if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
8514             redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s", 
8518         if (buf
[0] != '+') { 
8520             redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?"); 
8525     /* Issue the SYNC command */ 
8526     if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) { 
8528         redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s", 
8532     /* Read the bulk write count */ 
8533     if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
8535         redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s", 
8539     if (buf
[0] != '$') { 
8541         redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); 
8544     dumpsize 
= strtol(buf
+1,NULL
,10); 
8545     redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
); 
8546     /* Read the bulk write data on a temp file */ 
8548         snprintf(tmpfile
,256, 
8549             "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid()); 
8550         dfd 
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644); 
8551         if (dfd 
!= -1) break; 
8556         redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
)); 
8560         int nread
, nwritten
; 
8562         nread 
= read(fd
,buf
,(dumpsize 
< 1024)?dumpsize
:1024); 
8564             redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s", 
8570         nwritten 
= write(dfd
,buf
,nread
); 
8571         if (nwritten 
== -1) { 
8572             redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
)); 
8580     if (rename(tmpfile
,server
.dbfilename
) == -1) { 
8581         redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
)); 
8587     if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
8588         redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk"); 
8592     server
.master 
= createClient(fd
); 
8593     server
.master
->flags 
|= REDIS_MASTER
; 
8594     server
.master
->authenticated 
= 1; 
8595     server
.replstate 
= REDIS_REPL_CONNECTED
; 
8599 static void slaveofCommand(redisClient 
*c
) { 
8600     if (!strcasecmp(c
->argv
[1]->ptr
,"no") && 
8601         !strcasecmp(c
->argv
[2]->ptr
,"one")) { 
8602         if (server
.masterhost
) { 
8603             sdsfree(server
.masterhost
); 
8604             server
.masterhost 
= NULL
; 
8605             if (server
.master
) freeClient(server
.master
); 
8606             server
.replstate 
= REDIS_REPL_NONE
; 
8607             redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)"); 
8610         sdsfree(server
.masterhost
); 
8611         server
.masterhost 
= sdsdup(c
->argv
[1]->ptr
); 
8612         server
.masterport 
= atoi(c
->argv
[2]->ptr
); 
8613         if (server
.master
) freeClient(server
.master
); 
8614         server
.replstate 
= REDIS_REPL_CONNECT
; 
8615         redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)", 
8616             server
.masterhost
, server
.masterport
); 
8618     addReply(c
,shared
.ok
); 
8621 /* ============================ Maxmemory directive  ======================== */ 
8623 /* Try to free one object form the pre-allocated objects free list. 
8624  * This is useful under low mem conditions as by default we take 1 million 
8625  * free objects allocated. On success REDIS_OK is returned, otherwise 
8627 static int tryFreeOneObjectFromFreelist(void) { 
8630     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
8631     if (listLength(server
.objfreelist
)) { 
8632         listNode 
*head 
= listFirst(server
.objfreelist
); 
8633         o 
= listNodeValue(head
); 
8634         listDelNode(server
.objfreelist
,head
); 
8635         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
8639         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
8644 /* This function gets called when 'maxmemory' is set on the config file to limit 
8645  * the max memory used by the server, and we are out of memory. 
8646  * This function will try to, in order: 
8648  * - Free objects from the free list 
8649  * - Try to remove keys with an EXPIRE set 
8651  * It is not possible to free enough memory to reach used-memory < maxmemory 
8652  * the server will start refusing commands that will enlarge even more the 
8655 static void freeMemoryIfNeeded(void) { 
8656     while (server
.maxmemory 
&& zmalloc_used_memory() > server
.maxmemory
) { 
8657         int j
, k
, freed 
= 0; 
8659         if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
8660         for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8662             robj 
*minkey 
= NULL
; 
8663             struct dictEntry 
*de
; 
8665             if (dictSize(server
.db
[j
].expires
)) { 
8667                 /* From a sample of three keys drop the one nearest to 
8668                  * the natural expire */ 
8669                 for (k 
= 0; k 
< 3; k
++) { 
8672                     de 
= dictGetRandomKey(server
.db
[j
].expires
); 
8673                     t 
= (time_t) dictGetEntryVal(de
); 
8674                     if (minttl 
== -1 || t 
< minttl
) { 
8675                         minkey 
= dictGetEntryKey(de
); 
8679                 dbDelete(server
.db
+j
,minkey
); 
8682         if (!freed
) return; /* nothing to free... */ 
8686 /* ============================== Append Only file ========================== */ 
8688 /* Called when the user switches from "appendonly yes" to "appendonly no" 
8689  * at runtime using the CONFIG command. */ 
8690 static void stopAppendOnly(void) { 
8691     flushAppendOnlyFile(); 
8692     aof_fsync(server
.appendfd
); 
8693     close(server
.appendfd
); 
8695     server
.appendfd 
= -1; 
8696     server
.appendseldb 
= -1; 
8697     server
.appendonly 
= 0; 
8698     /* rewrite operation in progress? kill it, wait child exit */ 
8699     if (server
.bgsavechildpid 
!= -1) { 
8702         if (kill(server
.bgsavechildpid
,SIGKILL
) != -1) 
8703             wait3(&statloc
,0,NULL
); 
8704         /* reset the buffer accumulating changes while the child saves */ 
8705         sdsfree(server
.bgrewritebuf
); 
8706         server
.bgrewritebuf 
= sdsempty(); 
8707         server
.bgsavechildpid 
= -1; 
8711 /* Called when the user switches from "appendonly no" to "appendonly yes" 
8712  * at runtime using the CONFIG command. */ 
8713 static int startAppendOnly(void) { 
8714     server
.appendonly 
= 1; 
8715     server
.lastfsync 
= time(NULL
); 
8716     server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
8717     if (server
.appendfd 
== -1) { 
8718         redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
)); 
8721     if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) { 
8722         server
.appendonly 
= 0; 
8723         close(server
.appendfd
); 
8724         redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
)); 
8730 /* Write the append only file buffer on disk. 
8732  * Since we are required to write the AOF before replying to the client, 
8733  * and the only way the client socket can get a write is entering when the 
8734  * the event loop, we accumulate all the AOF writes in a memory 
8735  * buffer and write it on disk using this function just before entering 
8736  * the event loop again. */ 
8737 static void flushAppendOnlyFile(void) { 
8741     if (sdslen(server
.aofbuf
) == 0) return; 
8743     /* We want to perform a single write. This should be guaranteed atomic 
8744      * at least if the filesystem we are writing is a real physical one. 
8745      * While this will save us against the server being killed I don't think 
8746      * there is much to do about the whole server stopping for power problems 
8748      nwritten 
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
)); 
8749      if (nwritten 
!= (signed)sdslen(server
.aofbuf
)) { 
8750         /* Ooops, we are in troubles. The best thing to do for now is 
8751          * aborting instead of giving the illusion that everything is 
8752          * working as expected. */ 
8753          if (nwritten 
== -1) { 
8754             redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
)); 
8756             redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
)); 
8760     sdsfree(server
.aofbuf
); 
8761     server
.aofbuf 
= sdsempty(); 
8763     /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have 
8764      * childs performing heavy I/O on disk. */ 
8765     if (server
.no_appendfsync_on_rewrite 
&& 
8766         (server
.bgrewritechildpid 
!= -1 || server
.bgsavechildpid 
!= -1)) 
8768     /* Fsync if needed */ 
8770     if (server
.appendfsync 
== APPENDFSYNC_ALWAYS 
|| 
8771         (server
.appendfsync 
== APPENDFSYNC_EVERYSEC 
&& 
8772          now
-server
.lastfsync 
> 1)) 
8774         /* aof_fsync is defined as fdatasync() for Linux in order to avoid 
8775          * flushing metadata. */ 
8776         aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */ 
8777         server
.lastfsync 
= now
; 
8781 static sds 
catAppendOnlyGenericCommand(sds buf
, int argc
, robj 
**argv
) { 
8783     buf 
= sdscatprintf(buf
,"*%d\r\n",argc
); 
8784     for (j 
= 0; j 
< argc
; j
++) { 
8785         robj 
*o 
= getDecodedObject(argv
[j
]); 
8786         buf 
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
)); 
8787         buf 
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
)); 
8788         buf 
= sdscatlen(buf
,"\r\n",2); 
8794 static sds 
catAppendOnlyExpireAtCommand(sds buf
, robj 
*key
, robj 
*seconds
) { 
8799     /* Make sure we can use strtol */ 
8800     seconds 
= getDecodedObject(seconds
); 
8801     when 
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10); 
8802     decrRefCount(seconds
); 
8804     argv
[0] = createStringObject("EXPIREAT",8); 
8806     argv
[2] = createObject(REDIS_STRING
, 
8807         sdscatprintf(sdsempty(),"%ld",when
)); 
8808     buf 
= catAppendOnlyGenericCommand(buf
, argc
, argv
); 
8809     decrRefCount(argv
[0]); 
8810     decrRefCount(argv
[2]); 
8814 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
8815     sds buf 
= sdsempty(); 
8818     /* The DB this command was targetting is not the same as the last command 
8819      * we appendend. To issue a SELECT command is needed. */ 
8820     if (dictid 
!= server
.appendseldb
) { 
8823         snprintf(seldb
,sizeof(seldb
),"%d",dictid
); 
8824         buf 
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", 
8825             (unsigned long)strlen(seldb
),seldb
); 
8826         server
.appendseldb 
= dictid
; 
8829     if (cmd
->proc 
== expireCommand
) { 
8830         /* Translate EXPIRE into EXPIREAT */ 
8831         buf 
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]); 
8832     } else if (cmd
->proc 
== setexCommand
) { 
8833         /* Translate SETEX to SET and EXPIREAT */ 
8834         tmpargv
[0] = createStringObject("SET",3); 
8835         tmpargv
[1] = argv
[1]; 
8836         tmpargv
[2] = argv
[3]; 
8837         buf 
= catAppendOnlyGenericCommand(buf
,3,tmpargv
); 
8838         decrRefCount(tmpargv
[0]); 
8839         buf 
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]); 
8841         buf 
= catAppendOnlyGenericCommand(buf
,argc
,argv
); 
8844     /* Append to the AOF buffer. This will be flushed on disk just before 
8845      * of re-entering the event loop, so before the client will get a 
8846      * positive reply about the operation performed. */ 
8847     server
.aofbuf 
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
)); 
8849     /* If a background append only file rewriting is in progress we want to 
8850      * accumulate the differences between the child DB and the current one 
8851      * in a buffer, so that when the child process will do its work we 
8852      * can append the differences to the new append only file. */ 
8853     if (server
.bgrewritechildpid 
!= -1) 
8854         server
.bgrewritebuf 
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
)); 
8859 /* In Redis commands are always executed in the context of a client, so in 
8860  * order to load the append only file we need to create a fake client. */ 
8861 static struct redisClient 
*createFakeClient(void) { 
8862     struct redisClient 
*c 
= zmalloc(sizeof(*c
)); 
8866     c
->querybuf 
= sdsempty(); 
8870     /* We set the fake client as a slave waiting for the synchronization 
8871      * so that Redis will not try to send replies to this client. */ 
8872     c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
8873     c
->reply 
= listCreate(); 
8874     listSetFreeMethod(c
->reply
,decrRefCount
); 
8875     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
8876     initClientMultiState(c
); 
8880 static void freeFakeClient(struct redisClient 
*c
) { 
8881     sdsfree(c
->querybuf
); 
8882     listRelease(c
->reply
); 
8883     freeClientMultiState(c
); 
8887 /* Replay the append log file. On error REDIS_OK is returned. On non fatal 
8888  * error (the append only file is zero-length) REDIS_ERR is returned. On 
8889  * fatal error an error message is logged and the program exists. */ 
8890 int loadAppendOnlyFile(char *filename
) { 
8891     struct redisClient 
*fakeClient
; 
8892     FILE *fp 
= fopen(filename
,"r"); 
8893     struct redis_stat sb
; 
8894     int appendonly 
= server
.appendonly
; 
8896     if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size 
== 0) 
8900         redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
)); 
8904     /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI 
8905      * to the same file we're about to read. */ 
8906     server
.appendonly 
= 0; 
8908     fakeClient 
= createFakeClient(); 
8915         struct redisCommand 
*cmd
; 
8918         if (fgets(buf
,sizeof(buf
),fp
) == NULL
) { 
8924         if (buf
[0] != '*') goto fmterr
; 
8926         argv 
= zmalloc(sizeof(robj
*)*argc
); 
8927         for (j 
= 0; j 
< argc
; j
++) { 
8928             if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
; 
8929             if (buf
[0] != '$') goto fmterr
; 
8930             len 
= strtol(buf
+1,NULL
,10); 
8931             argsds 
= sdsnewlen(NULL
,len
); 
8932             if (len 
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
; 
8933             argv
[j
] = createObject(REDIS_STRING
,argsds
); 
8934             if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */ 
8937         /* Command lookup */ 
8938         cmd 
= lookupCommand(argv
[0]->ptr
); 
8940             redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
); 
8943         /* Try object encoding */ 
8944         if (cmd
->flags 
& REDIS_CMD_BULK
) 
8945             argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]); 
8946         /* Run the command in the context of a fake client */ 
8947         fakeClient
->argc 
= argc
; 
8948         fakeClient
->argv 
= argv
; 
8949         cmd
->proc(fakeClient
); 
8950         /* Discard the reply objects list from the fake client */ 
8951         while(listLength(fakeClient
->reply
)) 
8952             listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
)); 
8953         /* Clean up, ready for the next command */ 
8954         for (j 
= 0; j 
< argc
; j
++) decrRefCount(argv
[j
]); 
8956         /* Handle swapping while loading big datasets when VM is on */ 
8958         if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32) 
8961         if (server
.vm_enabled 
&& force_swapout
) { 
8962             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
8963                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
8968     /* This point can only be reached when EOF is reached without errors. 
8969      * If the client is in the middle of a MULTI/EXEC, log error and quit. */ 
8970     if (fakeClient
->flags 
& REDIS_MULTI
) goto readerr
; 
8973     freeFakeClient(fakeClient
); 
8974     server
.appendonly 
= appendonly
; 
8979         redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file"); 
8981         redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
)); 
8985     redisLog(REDIS_WARNING
,"Bad file format reading the append only file"); 
8989 /* Write binary-safe string into a file in the bulkformat 
8990  * $<count>\r\n<payload>\r\n */ 
8991 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) { 
8995     clen 
= 1+ll2string(cbuf
+1,sizeof(cbuf
)-1,len
); 
8996     cbuf
[clen
++] = '\r'; 
8997     cbuf
[clen
++] = '\n'; 
8998     if (fwrite(cbuf
,clen
,1,fp
) == 0) return 0; 
8999     if (len 
> 0 && fwrite(s
,len
,1,fp
) == 0) return 0; 
9000     if (fwrite("\r\n",2,1,fp
) == 0) return 0; 
9004 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */ 
9005 static int fwriteBulkDouble(FILE *fp
, double d
) { 
9006     char buf
[128], dbuf
[128]; 
9008     snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
); 
9009     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2); 
9010     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
9011     if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0; 
9015 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */ 
9016 static int fwriteBulkLongLong(FILE *fp
, long long l
) { 
9017     char bbuf
[128], lbuf
[128]; 
9018     unsigned int blen
, llen
; 
9019     llen 
= ll2string(lbuf
,32,l
); 
9020     blen 
= snprintf(bbuf
,sizeof(bbuf
),"$%u\r\n%s\r\n",llen
,lbuf
); 
9021     if (fwrite(bbuf
,blen
,1,fp
) == 0) return 0; 
9025 /* Delegate writing an object to writing a bulk string or bulk long long. */ 
9026 static int fwriteBulkObject(FILE *fp
, robj 
*obj
) { 
9027     /* Avoid using getDecodedObject to help copy-on-write (we are often 
9028      * in a child process when this function is called). */ 
9029     if (obj
->encoding 
== REDIS_ENCODING_INT
) { 
9030         return fwriteBulkLongLong(fp
,(long)obj
->ptr
); 
9031     } else if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
9032         return fwriteBulkString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
9034         redisPanic("Unknown string encoding"); 
9038 /* Write a sequence of commands able to fully rebuild the dataset into 
9039  * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ 
9040 static int rewriteAppendOnlyFile(char *filename
) { 
9041     dictIterator 
*di 
= NULL
; 
9046     time_t now 
= time(NULL
); 
9048     /* Note that we have to use a different temp name here compared to the 
9049      * one used by rewriteAppendOnlyFileBackground() function. */ 
9050     snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid()); 
9051     fp 
= fopen(tmpfile
,"w"); 
9053         redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
)); 
9056     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
9057         char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n"; 
9058         redisDb 
*db 
= server
.db
+j
; 
9060         if (dictSize(d
) == 0) continue; 
9061         di 
= dictGetIterator(d
); 
9067         /* SELECT the new DB */ 
9068         if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
; 
9069         if (fwriteBulkLongLong(fp
,j
) == 0) goto werr
; 
9071         /* Iterate this DB writing every entry */ 
9072         while((de 
= dictNext(di
)) != NULL
) { 
9073             sds keystr 
= dictGetEntryKey(de
); 
9078             keystr 
= dictGetEntryKey(de
); 
9079             o 
= dictGetEntryVal(de
); 
9080             initStaticStringObject(key
,keystr
); 
9081             /* If the value for this key is swapped, load a preview in memory. 
9082              * We use a "swapped" flag to remember if we need to free the 
9083              * value object instead to just increment the ref count anyway 
9084              * in order to avoid copy-on-write of pages if we are forked() */ 
9085             if (!server
.vm_enabled 
|| o
->storage 
== REDIS_VM_MEMORY 
|| 
9086                 o
->storage 
== REDIS_VM_SWAPPING
) { 
9089                 o 
= vmPreviewObject(o
); 
9092             expiretime 
= getExpire(db
,&key
); 
9094             /* Save the key and associated value */ 
9095             if (o
->type 
== REDIS_STRING
) { 
9096                 /* Emit a SET command */ 
9097                 char cmd
[]="*3\r\n$3\r\nSET\r\n"; 
9098                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9100                 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9101                 if (fwriteBulkObject(fp
,o
) == 0) goto werr
; 
9102             } else if (o
->type 
== REDIS_LIST
) { 
9103                 /* Emit the RPUSHes needed to rebuild the list */ 
9104                 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n"; 
9105                 if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
9106                     unsigned char *zl 
= o
->ptr
; 
9107                     unsigned char *p 
= ziplistIndex(zl
,0); 
9108                     unsigned char *vstr
; 
9112                     while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
9113                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9114                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9116                             if (fwriteBulkString(fp
,(char*)vstr
,vlen
) == 0) 
9119                             if (fwriteBulkLongLong(fp
,vlong
) == 0) 
9122                         p 
= ziplistNext(zl
,p
); 
9124                 } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
9125                     list 
*list 
= o
->ptr
; 
9129                     listRewind(list
,&li
); 
9130                     while((ln 
= listNext(&li
))) { 
9131                         robj 
*eleobj 
= listNodeValue(ln
); 
9133                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9134                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9135                         if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
9138                     redisPanic("Unknown list encoding"); 
9140             } else if (o
->type 
== REDIS_SET
) { 
9141                 /* Emit the SADDs needed to rebuild the set */ 
9143                 dictIterator 
*di 
= dictGetIterator(set
); 
9146                 while((de 
= dictNext(di
)) != NULL
) { 
9147                     char cmd
[]="*3\r\n$4\r\nSADD\r\n"; 
9148                     robj 
*eleobj 
= dictGetEntryKey(de
); 
9150                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9151                     if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9152                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
9154                 dictReleaseIterator(di
); 
9155             } else if (o
->type 
== REDIS_ZSET
) { 
9156                 /* Emit the ZADDs needed to rebuild the sorted set */ 
9158                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
9161                 while((de 
= dictNext(di
)) != NULL
) { 
9162                     char cmd
[]="*4\r\n$4\r\nZADD\r\n"; 
9163                     robj 
*eleobj 
= dictGetEntryKey(de
); 
9164                     double *score 
= dictGetEntryVal(de
); 
9166                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9167                     if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9168                     if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
; 
9169                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
9171                 dictReleaseIterator(di
); 
9172             } else if (o
->type 
== REDIS_HASH
) { 
9173                 char cmd
[]="*4\r\n$4\r\nHSET\r\n"; 
9175                 /* Emit the HSETs needed to rebuild the hash */ 
9176                 if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
9177                     unsigned char *p 
= zipmapRewind(o
->ptr
); 
9178                     unsigned char *field
, *val
; 
9179                     unsigned int flen
, vlen
; 
9181                     while((p 
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) { 
9182                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9183                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9184                         if (fwriteBulkString(fp
,(char*)field
,flen
) == -1) 
9186                         if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1) 
9190                     dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
9193                     while((de 
= dictNext(di
)) != NULL
) { 
9194                         robj 
*field 
= dictGetEntryKey(de
); 
9195                         robj 
*val 
= dictGetEntryVal(de
); 
9197                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9198                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9199                         if (fwriteBulkObject(fp
,field
) == -1) return -1; 
9200                         if (fwriteBulkObject(fp
,val
) == -1) return -1; 
9202                     dictReleaseIterator(di
); 
9205                 redisPanic("Unknown object type"); 
9207             /* Save the expire time */ 
9208             if (expiretime 
!= -1) { 
9209                 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n"; 
9210                 /* If this key is already expired skip it */ 
9211                 if (expiretime 
< now
) continue; 
9212                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9213                 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9214                 if (fwriteBulkLongLong(fp
,expiretime
) == 0) goto werr
; 
9216             if (swapped
) decrRefCount(o
); 
9218         dictReleaseIterator(di
); 
9221     /* Make sure data will not remain on the OS's output buffers */ 
9223     aof_fsync(fileno(fp
)); 
9226     /* Use RENAME to make sure the DB file is changed atomically only 
9227      * if the generate DB file is ok. */ 
9228     if (rename(tmpfile
,filename
) == -1) { 
9229         redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
)); 
9233     redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed"); 
9239     redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
)); 
9240     if (di
) dictReleaseIterator(di
); 
9244 /* This is how rewriting of the append only file in background works: 
9246  * 1) The user calls BGREWRITEAOF 
9247  * 2) Redis calls this function, that forks(): 
9248  *    2a) the child rewrite the append only file in a temp file. 
9249  *    2b) the parent accumulates differences in server.bgrewritebuf. 
9250  * 3) When the child finished '2a' exists. 
9251  * 4) The parent will trap the exit code, if it's OK, will append the 
9252  *    data accumulated into server.bgrewritebuf into the temp file, and 
9253  *    finally will rename(2) the temp file in the actual file name. 
9254  *    The the new file is reopened as the new append only file. Profit! 
9256 static int rewriteAppendOnlyFileBackground(void) { 
9259     if (server
.bgrewritechildpid 
!= -1) return REDIS_ERR
; 
9260     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
9261     if ((childpid 
= fork()) == 0) { 
9265         if (server
.vm_enabled
) vmReopenSwapFile(); 
9267         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); 
9268         if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) { 
9275         if (childpid 
== -1) { 
9276             redisLog(REDIS_WARNING
, 
9277                 "Can't rewrite append only file in background: fork: %s", 
9281         redisLog(REDIS_NOTICE
, 
9282             "Background append only file rewriting started by pid %d",childpid
); 
9283         server
.bgrewritechildpid 
= childpid
; 
9284         updateDictResizePolicy(); 
9285         /* We set appendseldb to -1 in order to force the next call to the 
9286          * feedAppendOnlyFile() to issue a SELECT command, so the differences 
9287          * accumulated by the parent into server.bgrewritebuf will start 
9288          * with a SELECT statement and it will be safe to merge. */ 
9289         server
.appendseldb 
= -1; 
9292     return REDIS_OK
; /* unreached */ 
9295 static void bgrewriteaofCommand(redisClient 
*c
) { 
9296     if (server
.bgrewritechildpid 
!= -1) { 
9297         addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n")); 
9300     if (rewriteAppendOnlyFileBackground() == REDIS_OK
) { 
9301         char *status 
= "+Background append only file rewriting started\r\n"; 
9302         addReplySds(c
,sdsnew(status
)); 
9304         addReply(c
,shared
.err
); 
9308 static void aofRemoveTempFile(pid_t childpid
) { 
9311     snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
); 
9315 /* Virtual Memory is composed mainly of two subsystems: 
9316  * - Blocking Virutal Memory 
9317  * - Threaded Virtual Memory I/O 
9318  * The two parts are not fully decoupled, but functions are split among two 
9319  * different sections of the source code (delimited by comments) in order to 
9320  * make more clear what functionality is about the blocking VM and what about 
9321  * the threaded (not blocking) VM. 
9325  * Redis VM is a blocking VM (one that blocks reading swapped values from 
9326  * disk into memory when a value swapped out is needed in memory) that is made 
9327  * unblocking by trying to examine the command argument vector in order to 
9328  * load in background values that will likely be needed in order to exec 
9329  * the command. The command is executed only once all the relevant keys 
9330  * are loaded into memory. 
9332  * This basically is almost as simple of a blocking VM, but almost as parallel 
9333  * as a fully non-blocking VM. 
9336 /* =================== Virtual Memory - Blocking Side  ====================== */ 
9338 /* Create a VM pointer object. This kind of objects are used in place of 
9339  * values in the key -> value hash table, for swapped out objects. */ 
9340 static vmpointer 
*createVmPointer(int vtype
) { 
9341     vmpointer 
*vp 
= zmalloc(sizeof(vmpointer
)); 
9343     vp
->type 
= REDIS_VMPOINTER
; 
9344     vp
->storage 
= REDIS_VM_SWAPPED
; 
9349 static void vmInit(void) { 
9355     if (server
.vm_max_threads 
!= 0) 
9356         zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */ 
9358     redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
); 
9359     /* Try to open the old swap file, otherwise create it */ 
9360     if ((server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) { 
9361         server
.vm_fp 
= fopen(server
.vm_swap_file
,"w+b"); 
9363     if (server
.vm_fp 
== NULL
) { 
9364         redisLog(REDIS_WARNING
, 
9365             "Can't open the swap file: %s. Exiting.", 
9369     server
.vm_fd 
= fileno(server
.vm_fp
); 
9370     /* Lock the swap file for writing, this is useful in order to avoid 
9371      * another instance to use the same swap file for a config error. */ 
9372     fl
.l_type 
= F_WRLCK
; 
9373     fl
.l_whence 
= SEEK_SET
; 
9374     fl
.l_start 
= fl
.l_len 
= 0; 
9375     if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) { 
9376         redisLog(REDIS_WARNING
, 
9377             "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
)); 
9381     server
.vm_next_page 
= 0; 
9382     server
.vm_near_pages 
= 0; 
9383     server
.vm_stats_used_pages 
= 0; 
9384     server
.vm_stats_swapped_objects 
= 0; 
9385     server
.vm_stats_swapouts 
= 0; 
9386     server
.vm_stats_swapins 
= 0; 
9387     totsize 
= server
.vm_pages
*server
.vm_page_size
; 
9388     redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
); 
9389     if (ftruncate(server
.vm_fd
,totsize
) == -1) { 
9390         redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.", 
9394         redisLog(REDIS_NOTICE
,"Swap file allocated with success"); 
9396     server
.vm_bitmap 
= zmalloc((server
.vm_pages
+7)/8); 
9397     redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages", 
9398         (long long) (server
.vm_pages
+7)/8, server
.vm_pages
); 
9399     memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8); 
9401     /* Initialize threaded I/O (used by Virtual Memory) */ 
9402     server
.io_newjobs 
= listCreate(); 
9403     server
.io_processing 
= listCreate(); 
9404     server
.io_processed 
= listCreate(); 
9405     server
.io_ready_clients 
= listCreate(); 
9406     pthread_mutex_init(&server
.io_mutex
,NULL
); 
9407     pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
); 
9408     pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
); 
9409     server
.io_active_threads 
= 0; 
9410     if (pipe(pipefds
) == -1) { 
9411         redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting." 
9415     server
.io_ready_pipe_read 
= pipefds
[0]; 
9416     server
.io_ready_pipe_write 
= pipefds
[1]; 
9417     redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
); 
9418     /* LZF requires a lot of stack */ 
9419     pthread_attr_init(&server
.io_threads_attr
); 
9420     pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
); 
9421     while (stacksize 
< REDIS_THREAD_STACK_SIZE
) stacksize 
*= 2; 
9422     pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
); 
9423     /* Listen for events in the threaded I/O pipe */ 
9424     if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
, 
9425         vmThreadedIOCompletedJob
, NULL
) == AE_ERR
) 
9426         oom("creating file event"); 
9429 /* Mark the page as used */ 
9430 static void vmMarkPageUsed(off_t page
) { 
9431     off_t byte 
= page
/8; 
9433     redisAssert(vmFreePage(page
) == 1); 
9434     server
.vm_bitmap
[byte
] |= 1<<bit
; 
9437 /* Mark N contiguous pages as used, with 'page' being the first. */ 
9438 static void vmMarkPagesUsed(off_t page
, off_t count
) { 
9441     for (j 
= 0; j 
< count
; j
++) 
9442         vmMarkPageUsed(page
+j
); 
9443     server
.vm_stats_used_pages 
+= count
; 
9444     redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n", 
9445         (long long)count
, (long long)page
); 
9448 /* Mark the page as free */ 
9449 static void vmMarkPageFree(off_t page
) { 
9450     off_t byte 
= page
/8; 
9452     redisAssert(vmFreePage(page
) == 0); 
9453     server
.vm_bitmap
[byte
] &= ~(1<<bit
); 
9456 /* Mark N contiguous pages as free, with 'page' being the first. */ 
9457 static void vmMarkPagesFree(off_t page
, off_t count
) { 
9460     for (j 
= 0; j 
< count
; j
++) 
9461         vmMarkPageFree(page
+j
); 
9462     server
.vm_stats_used_pages 
-= count
; 
9463     redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n", 
9464         (long long)count
, (long long)page
); 
9467 /* Test if the page is free */ 
9468 static int vmFreePage(off_t page
) { 
9469     off_t byte 
= page
/8; 
9471     return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0; 
9474 /* Find N contiguous free pages storing the first page of the cluster in *first. 
9475  * Returns REDIS_OK if it was able to find N contiguous pages, otherwise 
9476  * REDIS_ERR is returned. 
9478  * This function uses a simple algorithm: we try to allocate 
9479  * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start 
9480  * again from the start of the swap file searching for free spaces. 
9482  * If it looks pretty clear that there are no free pages near our offset 
9483  * we try to find less populated places doing a forward jump of 
9484  * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages 
9485  * without hurry, and then we jump again and so forth... 
9487  * This function can be improved using a free list to avoid to guess 
9488  * too much, since we could collect data about freed pages. 
9490  * note: I implemented this function just after watching an episode of 
9491  * Battlestar Galactica, where the hybrid was continuing to say "JUMP!" 
9493 static int vmFindContiguousPages(off_t 
*first
, off_t n
) { 
9494     off_t base
, offset 
= 0, since_jump 
= 0, numfree 
= 0; 
9496     if (server
.vm_near_pages 
== REDIS_VM_MAX_NEAR_PAGES
) { 
9497         server
.vm_near_pages 
= 0; 
9498         server
.vm_next_page 
= 0; 
9500     server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */ 
9501     base 
= server
.vm_next_page
; 
9503     while(offset 
< server
.vm_pages
) { 
9504         off_t 
this = base
+offset
; 
9506         /* If we overflow, restart from page zero */ 
9507         if (this >= server
.vm_pages
) { 
9508             this -= server
.vm_pages
; 
9510                 /* Just overflowed, what we found on tail is no longer 
9511                  * interesting, as it's no longer contiguous. */ 
9515         if (vmFreePage(this)) { 
9516             /* This is a free page */ 
9518             /* Already got N free pages? Return to the caller, with success */ 
9520                 *first 
= this-(n
-1); 
9521                 server
.vm_next_page 
= this+1; 
9522                 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
); 
9526             /* The current one is not a free page */ 
9530         /* Fast-forward if the current page is not free and we already 
9531          * searched enough near this place. */ 
9533         if (!numfree 
&& since_jump 
>= REDIS_VM_MAX_RANDOM_JUMP
/4) { 
9534             offset 
+= random() % REDIS_VM_MAX_RANDOM_JUMP
; 
9536             /* Note that even if we rewind after the jump, we are don't need 
9537              * to make sure numfree is set to zero as we only jump *if* it 
9538              * is set to zero. */ 
9540             /* Otherwise just check the next page */ 
9547 /* Write the specified object at the specified page of the swap file */ 
9548 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
) { 
9549     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
9550     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
9551         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
9552         redisLog(REDIS_WARNING
, 
9553             "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s", 
9557     rdbSaveObject(server
.vm_fp
,o
); 
9558     fflush(server
.vm_fp
); 
9559     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
9563 /* Transfers the 'val' object to disk. Store all the information 
9564  * a 'vmpointer' object containing all the information needed to load the 
9565  * object back later is returned. 
9567  * If we can't find enough contiguous empty pages to swap the object on disk 
9568  * NULL is returned. */ 
9569 static vmpointer 
*vmSwapObjectBlocking(robj 
*val
) { 
9570     off_t pages 
= rdbSavedObjectPages(val
,NULL
); 
9574     assert(val
->storage 
== REDIS_VM_MEMORY
); 
9575     assert(val
->refcount 
== 1); 
9576     if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return NULL
; 
9577     if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return NULL
; 
9579     vp 
= createVmPointer(val
->type
); 
9581     vp
->usedpages 
= pages
; 
9582     decrRefCount(val
); /* Deallocate the object from memory. */ 
9583     vmMarkPagesUsed(page
,pages
); 
9584     redisLog(REDIS_DEBUG
,"VM: object %p swapped out at %lld (%lld pages)", 
9586         (unsigned long long) page
, (unsigned long long) pages
); 
9587     server
.vm_stats_swapped_objects
++; 
9588     server
.vm_stats_swapouts
++; 
9592 static robj 
*vmReadObjectFromSwap(off_t page
, int type
) { 
9595     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
9596     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
9597         redisLog(REDIS_WARNING
, 
9598             "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s", 
9602     o 
= rdbLoadObject(type
,server
.vm_fp
); 
9604         redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
)); 
9607     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
9611 /* Load the specified object from swap to memory. 
9612  * The newly allocated object is returned. 
9614  * If preview is true the unserialized object is returned to the caller but 
9615  * the pages are not marked as freed, nor the vp object is freed. */ 
9616 static robj 
*vmGenericLoadObject(vmpointer 
*vp
, int preview
) { 
9619     redisAssert(vp
->type 
== REDIS_VMPOINTER 
&& 
9620         (vp
->storage 
== REDIS_VM_SWAPPED 
|| vp
->storage 
== REDIS_VM_LOADING
)); 
9621     val 
= vmReadObjectFromSwap(vp
->page
,vp
->vtype
); 
9623         redisLog(REDIS_DEBUG
, "VM: object %p loaded from disk", (void*)vp
); 
9624         vmMarkPagesFree(vp
->page
,vp
->usedpages
); 
9626         server
.vm_stats_swapped_objects
--; 
9628         redisLog(REDIS_DEBUG
, "VM: object %p previewed from disk", (void*)vp
); 
9630     server
.vm_stats_swapins
++; 
9634 /* Plain object loading, from swap to memory. 
9636  * 'o' is actually a redisVmPointer structure that will be freed by the call. 
9637  * The return value is the loaded object. */ 
9638 static robj 
*vmLoadObject(robj 
*o
) { 
9639     /* If we are loading the object in background, stop it, we 
9640      * need to load this object synchronously ASAP. */ 
9641     if (o
->storage 
== REDIS_VM_LOADING
) 
9642         vmCancelThreadedIOJob(o
); 
9643     return vmGenericLoadObject((vmpointer
*)o
,0); 
9646 /* Just load the value on disk, without to modify the key. 
9647  * This is useful when we want to perform some operation on the value 
9648  * without to really bring it from swap to memory, like while saving the 
9649  * dataset or rewriting the append only log. */ 
9650 static robj 
*vmPreviewObject(robj 
*o
) { 
9651     return vmGenericLoadObject((vmpointer
*)o
,1); 
9654 /* How a good candidate is this object for swapping? 
9655  * The better candidate it is, the greater the returned value. 
9657  * Currently we try to perform a fast estimation of the object size in 
9658  * memory, and combine it with aging informations. 
9660  * Basically swappability = idle-time * log(estimated size) 
9662  * Bigger objects are preferred over smaller objects, but not 
9663  * proportionally, this is why we use the logarithm. This algorithm is 
9664  * just a first try and will probably be tuned later. */ 
9665 static double computeObjectSwappability(robj 
*o
) { 
9666     /* actual age can be >= minage, but not < minage. As we use wrapping 
9667      * 21 bit clocks with minutes resolution for the LRU. */ 
9668     time_t minage 
= abs(server
.lruclock 
- o
->lru
); 
9669     long asize 
= 0, elesize
; 
9674     struct dictEntry 
*de
; 
9677     if (minage 
<= 0) return 0; 
9680         if (o
->encoding 
!= REDIS_ENCODING_RAW
) { 
9683             asize 
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2; 
9687         if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
9688             asize 
= sizeof(*o
)+ziplistSize(o
->ptr
); 
9692             asize 
= sizeof(list
); 
9695                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9696                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9697                 asize 
+= (sizeof(listNode
)+elesize
)*listLength(l
); 
9703         z 
= (o
->type 
== REDIS_ZSET
); 
9704         d 
= z 
? ((zset
*)o
->ptr
)->dict 
: o
->ptr
; 
9706         asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
9707         if (z
) asize 
+= sizeof(zset
)-sizeof(dict
); 
9709             de 
= dictGetRandomKey(d
); 
9710             ele 
= dictGetEntryKey(de
); 
9711             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9712                             (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9713             asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
9714             if (z
) asize 
+= sizeof(zskiplistNode
)*dictSize(d
); 
9718         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
9719             unsigned char *p 
= zipmapRewind((unsigned char*)o
->ptr
); 
9720             unsigned int len 
= zipmapLen((unsigned char*)o
->ptr
); 
9721             unsigned int klen
, vlen
; 
9722             unsigned char *key
, *val
; 
9724             if ((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) { 
9728             asize 
= len
*(klen
+vlen
+3); 
9729         } else if (o
->encoding 
== REDIS_ENCODING_HT
) { 
9731             asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
9733                 de 
= dictGetRandomKey(d
); 
9734                 ele 
= dictGetEntryKey(de
); 
9735                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9736                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9737                 ele 
= dictGetEntryVal(de
); 
9738                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9739                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9740                 asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
9745     return (double)minage
*log(1+asize
); 
9748 /* Try to swap an object that's a good candidate for swapping. 
9749  * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible 
9750  * to swap any object at all. 
9752  * If 'usethreaded' is true, Redis will try to swap the object in background 
9753  * using I/O threads. */ 
9754 static int vmSwapOneObject(int usethreads
) { 
9756     struct dictEntry 
*best 
= NULL
; 
9757     double best_swappability 
= 0; 
9758     redisDb 
*best_db 
= NULL
; 
9762     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
9763         redisDb 
*db 
= server
.db
+j
; 
9764         /* Why maxtries is set to 100? 
9765          * Because this way (usually) we'll find 1 object even if just 1% - 2% 
9766          * are swappable objects */ 
9769         if (dictSize(db
->dict
) == 0) continue; 
9770         for (i 
= 0; i 
< 5; i
++) { 
9772             double swappability
; 
9774             if (maxtries
) maxtries
--; 
9775             de 
= dictGetRandomKey(db
->dict
); 
9776             val 
= dictGetEntryVal(de
); 
9777             /* Only swap objects that are currently in memory. 
9779              * Also don't swap shared objects: not a good idea in general and 
9780              * we need to ensure that the main thread does not touch the 
9781              * object while the I/O thread is using it, but we can't 
9782              * control other keys without adding additional mutex. */ 
9783             if (val
->storage 
!= REDIS_VM_MEMORY 
|| val
->refcount 
!= 1) { 
9784                 if (maxtries
) i
--; /* don't count this try */ 
9787             swappability 
= computeObjectSwappability(val
); 
9788             if (!best 
|| swappability 
> best_swappability
) { 
9790                 best_swappability 
= swappability
; 
9795     if (best 
== NULL
) return REDIS_ERR
; 
9796     key 
= dictGetEntryKey(best
); 
9797     val 
= dictGetEntryVal(best
); 
9799     redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f", 
9800         key
, best_swappability
); 
9804         robj 
*keyobj 
= createStringObject(key
,sdslen(key
)); 
9805         vmSwapObjectThreaded(keyobj
,val
,best_db
); 
9806         decrRefCount(keyobj
); 
9811         if ((vp 
= vmSwapObjectBlocking(val
)) != NULL
) { 
9812             dictGetEntryVal(best
) = vp
; 
9820 static int vmSwapOneObjectBlocking() { 
9821     return vmSwapOneObject(0); 
9824 static int vmSwapOneObjectThreaded() { 
9825     return vmSwapOneObject(1); 
9828 /* Return true if it's safe to swap out objects in a given moment. 
9829  * Basically we don't want to swap objects out while there is a BGSAVE 
9830  * or a BGAEOREWRITE running in backgroud. */ 
9831 static int vmCanSwapOut(void) { 
9832     return (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1); 
9835 /* =================== Virtual Memory - Threaded I/O  ======================= */ 
9837 static void freeIOJob(iojob 
*j
) { 
9838     if ((j
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
9839         j
->type 
== REDIS_IOJOB_DO_SWAP 
|| 
9840         j
->type 
== REDIS_IOJOB_LOAD
) && j
->val 
!= NULL
) 
9842          /* we fix the storage type, otherwise decrRefCount() will try to 
9843           * kill the I/O thread Job (that does no longer exists). */ 
9844         if (j
->val
->storage 
== REDIS_VM_SWAPPING
) 
9845             j
->val
->storage 
= REDIS_VM_MEMORY
; 
9846         decrRefCount(j
->val
); 
9848     decrRefCount(j
->key
); 
9852 /* Every time a thread finished a Job, it writes a byte into the write side 
9853  * of an unix pipe in order to "awake" the main thread, and this function 
9855 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, 
9859     int retval
, processed 
= 0, toprocess 
= -1, trytoswap 
= 1; 
9861     REDIS_NOTUSED(mask
); 
9862     REDIS_NOTUSED(privdata
); 
9864     /* For every byte we read in the read side of the pipe, there is one 
9865      * I/O job completed to process. */ 
9866     while((retval 
= read(fd
,buf
,1)) == 1) { 
9869         struct dictEntry 
*de
; 
9871         redisLog(REDIS_DEBUG
,"Processing I/O completed job"); 
9873         /* Get the processed element (the oldest one) */ 
9875         assert(listLength(server
.io_processed
) != 0); 
9876         if (toprocess 
== -1) { 
9877             toprocess 
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100; 
9878             if (toprocess 
<= 0) toprocess 
= 1; 
9880         ln 
= listFirst(server
.io_processed
); 
9882         listDelNode(server
.io_processed
,ln
); 
9884         /* If this job is marked as canceled, just ignore it */ 
9889         /* Post process it in the main thread, as there are things we 
9890          * can do just here to avoid race conditions and/or invasive locks */ 
9891         redisLog(REDIS_DEBUG
,"COMPLETED Job type: %d, ID %p, key: %s", j
->type
, (void*)j
->id
, (unsigned char*)j
->key
->ptr
); 
9892         de 
= dictFind(j
->db
->dict
,j
->key
->ptr
); 
9893         redisAssert(de 
!= NULL
); 
9894         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
9896             vmpointer 
*vp 
= dictGetEntryVal(de
); 
9898             /* Key loaded, bring it at home */ 
9899             vmMarkPagesFree(vp
->page
,vp
->usedpages
); 
9900             redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)", 
9901                 (unsigned char*) j
->key
->ptr
); 
9902             server
.vm_stats_swapped_objects
--; 
9903             server
.vm_stats_swapins
++; 
9904             dictGetEntryVal(de
) = j
->val
; 
9905             incrRefCount(j
->val
); 
9907             /* Handle clients waiting for this key to be loaded. */ 
9908             handleClientsBlockedOnSwappedKey(db
,j
->key
); 
9911         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
9912             /* Now we know the amount of pages required to swap this object. 
9913              * Let's find some space for it, and queue this task again 
9914              * rebranded as REDIS_IOJOB_DO_SWAP. */ 
9915             if (!vmCanSwapOut() || 
9916                 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
) 
9918                 /* Ooops... no space or we can't swap as there is 
9919                  * a fork()ed Redis trying to save stuff on disk. */ 
9920                 j
->val
->storage 
= REDIS_VM_MEMORY
; /* undo operation */ 
9923                 /* Note that we need to mark this pages as used now, 
9924                  * if the job will be canceled, we'll mark them as freed 
9926                 vmMarkPagesUsed(j
->page
,j
->pages
); 
9927                 j
->type 
= REDIS_IOJOB_DO_SWAP
; 
9932         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
9935             /* Key swapped. We can finally free some memory. */ 
9936             if (j
->val
->storage 
!= REDIS_VM_SWAPPING
) { 
9937                 vmpointer 
*vp 
= (vmpointer
*) j
->id
; 
9938                 printf("storage: %d\n",vp
->storage
); 
9939                 printf("key->name: %s\n",(char*)j
->key
->ptr
); 
9940                 printf("val: %p\n",(void*)j
->val
); 
9941                 printf("val->type: %d\n",j
->val
->type
); 
9942                 printf("val->ptr: %s\n",(char*)j
->val
->ptr
); 
9944             redisAssert(j
->val
->storage 
== REDIS_VM_SWAPPING
); 
9945             vp 
= createVmPointer(j
->val
->type
); 
9947             vp
->usedpages 
= j
->pages
; 
9948             dictGetEntryVal(de
) = vp
; 
9949             /* Fix the storage otherwise decrRefCount will attempt to 
9950              * remove the associated I/O job */ 
9951             j
->val
->storage 
= REDIS_VM_MEMORY
; 
9952             decrRefCount(j
->val
); 
9953             redisLog(REDIS_DEBUG
, 
9954                 "VM: object %s swapped out at %lld (%lld pages) (threaded)", 
9955                 (unsigned char*) j
->key
->ptr
, 
9956                 (unsigned long long) j
->page
, (unsigned long long) j
->pages
); 
9957             server
.vm_stats_swapped_objects
++; 
9958             server
.vm_stats_swapouts
++; 
9960             /* Put a few more swap requests in queue if we are still 
9962             if (trytoswap 
&& vmCanSwapOut() && 
9963                 zmalloc_used_memory() > server
.vm_max_memory
) 
9968                     more 
= listLength(server
.io_newjobs
) < 
9969                             (unsigned) server
.vm_max_threads
; 
9971                     /* Don't waste CPU time if swappable objects are rare. */ 
9972                     if (vmSwapOneObjectThreaded() == REDIS_ERR
) { 
9980         if (processed 
== toprocess
) return; 
9982     if (retval 
< 0 && errno 
!= EAGAIN
) { 
9983         redisLog(REDIS_WARNING
, 
9984             "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", 
9989 static void lockThreadedIO(void) { 
9990     pthread_mutex_lock(&server
.io_mutex
); 
9993 static void unlockThreadedIO(void) { 
9994     pthread_mutex_unlock(&server
.io_mutex
); 
9997 /* Remove the specified object from the threaded I/O queue if still not 
9998  * processed, otherwise make sure to flag it as canceled. */ 
9999 static void vmCancelThreadedIOJob(robj 
*o
) { 
10001         server
.io_newjobs
,      /* 0 */ 
10002         server
.io_processing
,   /* 1 */ 
10003         server
.io_processed     
/* 2 */ 
10007     assert(o
->storage 
== REDIS_VM_LOADING 
|| o
->storage 
== REDIS_VM_SWAPPING
); 
10010     /* Search for a matching object in one of the queues */ 
10011     for (i 
= 0; i 
< 3; i
++) { 
10015         listRewind(lists
[i
],&li
); 
10016         while ((ln 
= listNext(&li
)) != NULL
) { 
10017             iojob 
*job 
= ln
->value
; 
10019             if (job
->canceled
) continue; /* Skip this, already canceled. */ 
10020             if (job
->id 
== o
) { 
10021                 redisLog(REDIS_DEBUG
,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n", 
10022                     (void*)job
, (char*)job
->key
->ptr
, job
->type
, i
); 
10023                 /* Mark the pages as free since the swap didn't happened 
10024                  * or happened but is now discarded. */ 
10025                 if (i 
!= 1 && job
->type 
== REDIS_IOJOB_DO_SWAP
) 
10026                     vmMarkPagesFree(job
->page
,job
->pages
); 
10027                 /* Cancel the job. It depends on the list the job is 
10030                 case 0: /* io_newjobs */ 
10031                     /* If the job was yet not processed the best thing to do 
10032                      * is to remove it from the queue at all */ 
10034                     listDelNode(lists
[i
],ln
); 
10036                 case 1: /* io_processing */ 
10037                     /* Oh Shi- the thread is messing with the Job: 
10039                      * Probably it's accessing the object if this is a 
10040                      * PREPARE_SWAP or DO_SWAP job. 
10041                      * If it's a LOAD job it may be reading from disk and 
10042                      * if we don't wait for the job to terminate before to 
10043                      * cancel it, maybe in a few microseconds data can be 
10044                      * corrupted in this pages. So the short story is: 
10046                      * Better to wait for the job to move into the 
10047                      * next queue (processed)... */ 
10049                     /* We try again and again until the job is completed. */ 
10050                     unlockThreadedIO(); 
10051                     /* But let's wait some time for the I/O thread 
10052                      * to finish with this job. After all this condition 
10053                      * should be very rare. */ 
10056                 case 2: /* io_processed */ 
10057                     /* The job was already processed, that's easy... 
10058                      * just mark it as canceled so that we'll ignore it 
10059                      * when processing completed jobs. */ 
10063                 /* Finally we have to adjust the storage type of the object 
10064                  * in order to "UNDO" the operaiton. */ 
10065                 if (o
->storage 
== REDIS_VM_LOADING
) 
10066                     o
->storage 
= REDIS_VM_SWAPPED
; 
10067                 else if (o
->storage 
== REDIS_VM_SWAPPING
) 
10068                     o
->storage 
= REDIS_VM_MEMORY
; 
10069                 unlockThreadedIO(); 
10070                 redisLog(REDIS_DEBUG
,"*** DONE"); 
10075     unlockThreadedIO(); 
10076     printf("Not found: %p\n", (void*)o
); 
10077     redisAssert(1 != 1); /* We should never reach this */ 
10080 static void *IOThreadEntryPoint(void *arg
) { 
10083     REDIS_NOTUSED(arg
); 
10085     pthread_detach(pthread_self()); 
10087         /* Get a new job to process */ 
10089         if (listLength(server
.io_newjobs
) == 0) { 
10090             /* No new jobs in queue, exit. */ 
10091             redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do", 
10092                 (long) pthread_self()); 
10093             server
.io_active_threads
--; 
10094             unlockThreadedIO(); 
10097         ln 
= listFirst(server
.io_newjobs
); 
10099         listDelNode(server
.io_newjobs
,ln
); 
10100         /* Add the job in the processing queue */ 
10101         j
->thread 
= pthread_self(); 
10102         listAddNodeTail(server
.io_processing
,j
); 
10103         ln 
= listLast(server
.io_processing
); /* We use ln later to remove it */ 
10104         unlockThreadedIO(); 
10105         redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'", 
10106             (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
); 
10108         /* Process the Job */ 
10109         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
10110             vmpointer 
*vp 
= (vmpointer
*)j
->id
; 
10111             j
->val 
= vmReadObjectFromSwap(j
->page
,vp
->vtype
); 
10112         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
10113             FILE *fp 
= fopen("/dev/null","w+"); 
10114             j
->pages 
= rdbSavedObjectPages(j
->val
,fp
); 
10116         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
10117             if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
) 
10121         /* Done: insert the job into the processed queue */ 
10122         redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)", 
10123             (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
); 
10125         listDelNode(server
.io_processing
,ln
); 
10126         listAddNodeTail(server
.io_processed
,j
); 
10127         unlockThreadedIO(); 
10129         /* Signal the main thread there is new stuff to process */ 
10130         assert(write(server
.io_ready_pipe_write
,"x",1) == 1); 
10132     return NULL
; /* never reached */ 
10135 static void spawnIOThread(void) { 
10137     sigset_t mask
, omask
; 
10140     sigemptyset(&mask
); 
10141     sigaddset(&mask
,SIGCHLD
); 
10142     sigaddset(&mask
,SIGHUP
); 
10143     sigaddset(&mask
,SIGPIPE
); 
10144     pthread_sigmask(SIG_SETMASK
, &mask
, &omask
); 
10145     while ((err 
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) { 
10146         redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s", 
10150     pthread_sigmask(SIG_SETMASK
, &omask
, NULL
); 
10151     server
.io_active_threads
++; 
10154 /* We need to wait for the last thread to exit before we are able to 
10155  * fork() in order to BGSAVE or BGREWRITEAOF. */ 
10156 static void waitEmptyIOJobsQueue(void) { 
10158         int io_processed_len
; 
10161         if (listLength(server
.io_newjobs
) == 0 && 
10162             listLength(server
.io_processing
) == 0 && 
10163             server
.io_active_threads 
== 0) 
10165             unlockThreadedIO(); 
10168         /* While waiting for empty jobs queue condition we post-process some 
10169          * finshed job, as I/O threads may be hanging trying to write against 
10170          * the io_ready_pipe_write FD but there are so much pending jobs that 
10171          * it's blocking. */ 
10172         io_processed_len 
= listLength(server
.io_processed
); 
10173         unlockThreadedIO(); 
10174         if (io_processed_len
) { 
10175             vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0); 
10176             usleep(1000); /* 1 millisecond */ 
10178             usleep(10000); /* 10 milliseconds */ 
10183 static void vmReopenSwapFile(void) { 
10184     /* Note: we don't close the old one as we are in the child process 
10185      * and don't want to mess at all with the original file object. */ 
10186     server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b"); 
10187     if (server
.vm_fp 
== NULL
) { 
10188         redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.", 
10189             server
.vm_swap_file
); 
10192     server
.vm_fd 
= fileno(server
.vm_fp
); 
10195 /* This function must be called while with threaded IO locked */ 
10196 static void queueIOJob(iojob 
*j
) { 
10197     redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n", 
10198         (void*)j
, j
->type
, (char*)j
->key
->ptr
); 
10199     listAddNodeTail(server
.io_newjobs
,j
); 
10200     if (server
.io_active_threads 
< server
.vm_max_threads
) 
10204 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
) { 
10207     j 
= zmalloc(sizeof(*j
)); 
10208     j
->type 
= REDIS_IOJOB_PREPARE_SWAP
; 
10212     j
->id 
= j
->val 
= val
; 
10215     j
->thread 
= (pthread_t
) -1; 
10216     val
->storage 
= REDIS_VM_SWAPPING
; 
10220     unlockThreadedIO(); 
10224 /* ============ Virtual Memory - Blocking clients on missing keys =========== */ 
10226 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded. 
10227  * If there is not already a job loading the key, it is craeted. 
10228  * The key is added to the io_keys list in the client structure, and also 
10229  * in the hash table mapping swapped keys to waiting clients, that is, 
10230  * server.io_waited_keys. */ 
10231 static int waitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
10232     struct dictEntry 
*de
; 
10236     /* If the key does not exist or is already in RAM we don't need to 
10237      * block the client at all. */ 
10238     de 
= dictFind(c
->db
->dict
,key
->ptr
); 
10239     if (de 
== NULL
) return 0; 
10240     o 
= dictGetEntryVal(de
); 
10241     if (o
->storage 
== REDIS_VM_MEMORY
) { 
10243     } else if (o
->storage 
== REDIS_VM_SWAPPING
) { 
10244         /* We were swapping the key, undo it! */ 
10245         vmCancelThreadedIOJob(o
); 
10249     /* OK: the key is either swapped, or being loaded just now. */ 
10251     /* Add the key to the list of keys this client is waiting for. 
10252      * This maps clients to keys they are waiting for. */ 
10253     listAddNodeTail(c
->io_keys
,key
); 
10256     /* Add the client to the swapped keys => clients waiting map. */ 
10257     de 
= dictFind(c
->db
->io_keys
,key
); 
10261         /* For every key we take a list of clients blocked for it */ 
10263         retval 
= dictAdd(c
->db
->io_keys
,key
,l
); 
10265         assert(retval 
== DICT_OK
); 
10267         l 
= dictGetEntryVal(de
); 
10269     listAddNodeTail(l
,c
); 
10271     /* Are we already loading the key from disk? If not create a job */ 
10272     if (o
->storage 
== REDIS_VM_SWAPPED
) { 
10274         vmpointer 
*vp 
= (vmpointer
*)o
; 
10276         o
->storage 
= REDIS_VM_LOADING
; 
10277         j 
= zmalloc(sizeof(*j
)); 
10278         j
->type 
= REDIS_IOJOB_LOAD
; 
10283         j
->page 
= vp
->page
; 
10286         j
->thread 
= (pthread_t
) -1; 
10289         unlockThreadedIO(); 
10294 /* Preload keys for any command with first, last and step values for 
10295  * the command keys prototype, as defined in the command table. */ 
10296 static void waitForMultipleSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
10298     if (cmd
->vm_firstkey 
== 0) return; 
10299     last 
= cmd
->vm_lastkey
; 
10300     if (last 
< 0) last 
= argc
+last
; 
10301     for (j 
= cmd
->vm_firstkey
; j 
<= last
; j 
+= cmd
->vm_keystep
) { 
10302         redisAssert(j 
< argc
); 
10303         waitForSwappedKey(c
,argv
[j
]); 
10307 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands. 
10308  * Note that the number of keys to preload is user-defined, so we need to 
10309  * apply a sanity check against argc. */ 
10310 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
10312     REDIS_NOTUSED(cmd
); 
10314     num 
= atoi(argv
[2]->ptr
); 
10315     if (num 
> (argc
-3)) return; 
10316     for (i 
= 0; i 
< num
; i
++) { 
10317         waitForSwappedKey(c
,argv
[3+i
]); 
10321 /* Preload keys needed to execute the entire MULTI/EXEC block. 
10323  * This function is called by blockClientOnSwappedKeys when EXEC is issued, 
10324  * and will block the client when any command requires a swapped out value. */ 
10325 static void execBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
10327     struct redisCommand 
*mcmd
; 
10329     REDIS_NOTUSED(cmd
); 
10330     REDIS_NOTUSED(argc
); 
10331     REDIS_NOTUSED(argv
); 
10333     if (!(c
->flags 
& REDIS_MULTI
)) return; 
10334     for (i 
= 0; i 
< c
->mstate
.count
; i
++) { 
10335         mcmd 
= c
->mstate
.commands
[i
].cmd
; 
10336         margc 
= c
->mstate
.commands
[i
].argc
; 
10337         margv 
= c
->mstate
.commands
[i
].argv
; 
10339         if (mcmd
->vm_preload_proc 
!= NULL
) { 
10340             mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
); 
10342             waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
); 
10347 /* Is this client attempting to run a command against swapped keys? 
10348  * If so, block it ASAP, load the keys in background, then resume it. 
10350  * The important idea about this function is that it can fail! If keys will 
10351  * still be swapped when the client is resumed, this key lookups will 
10352  * just block loading keys from disk. In practical terms this should only 
10353  * happen with SORT BY command or if there is a bug in this function. 
10355  * Return 1 if the client is marked as blocked, 0 if the client can 
10356  * continue as the keys it is going to access appear to be in memory. */ 
10357 static int blockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
) { 
10358     if (cmd
->vm_preload_proc 
!= NULL
) { 
10359         cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
); 
10361         waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
); 
10364     /* If the client was blocked for at least one key, mark it as blocked. */ 
10365     if (listLength(c
->io_keys
)) { 
10366         c
->flags 
|= REDIS_IO_WAIT
; 
10367         aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
10368         server
.vm_blocked_clients
++; 
10375 /* Remove the 'key' from the list of blocked keys for a given client. 
10377  * The function returns 1 when there are no longer blocking keys after 
10378  * the current one was removed (and the client can be unblocked). */ 
10379 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
10383     struct dictEntry 
*de
; 
10385     /* Remove the key from the list of keys this client is waiting for. */ 
10386     listRewind(c
->io_keys
,&li
); 
10387     while ((ln 
= listNext(&li
)) != NULL
) { 
10388         if (equalStringObjects(ln
->value
,key
)) { 
10389             listDelNode(c
->io_keys
,ln
); 
10393     assert(ln 
!= NULL
); 
10395     /* Remove the client form the key => waiting clients map. */ 
10396     de 
= dictFind(c
->db
->io_keys
,key
); 
10397     assert(de 
!= NULL
); 
10398     l 
= dictGetEntryVal(de
); 
10399     ln 
= listSearchKey(l
,c
); 
10400     assert(ln 
!= NULL
); 
10402     if (listLength(l
) == 0) 
10403         dictDelete(c
->db
->io_keys
,key
); 
10405     return listLength(c
->io_keys
) == 0; 
10408 /* Every time we now a key was loaded back in memory, we handle clients 
10409  * waiting for this key if any. */ 
10410 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
) { 
10411     struct dictEntry 
*de
; 
10416     de 
= dictFind(db
->io_keys
,key
); 
10419     l 
= dictGetEntryVal(de
); 
10420     len 
= listLength(l
); 
10421     /* Note: we can't use something like while(listLength(l)) as the list 
10422      * can be freed by the calling function when we remove the last element. */ 
10425         redisClient 
*c 
= ln
->value
; 
10427         if (dontWaitForSwappedKey(c
,key
)) { 
10428             /* Put the client in the list of clients ready to go as we 
10429              * loaded all the keys about it. */ 
10430             listAddNodeTail(server
.io_ready_clients
,c
); 
10435 /* =========================== Remote Configuration ========================= */ 
10437 static void configSetCommand(redisClient 
*c
) { 
10438     robj 
*o 
= getDecodedObject(c
->argv
[3]); 
10441     if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) { 
10442         zfree(server
.dbfilename
); 
10443         server
.dbfilename 
= zstrdup(o
->ptr
); 
10444     } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) { 
10445         zfree(server
.requirepass
); 
10446         server
.requirepass 
= zstrdup(o
->ptr
); 
10447     } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) { 
10448         zfree(server
.masterauth
); 
10449         server
.masterauth 
= zstrdup(o
->ptr
); 
10450     } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) { 
10451         if (getLongLongFromObject(o
,&ll
) == REDIS_ERR 
|| 
10452             ll 
< 0) goto badfmt
; 
10453         server
.maxmemory 
= ll
; 
10454     } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) { 
10455         if (getLongLongFromObject(o
,&ll
) == REDIS_ERR 
|| 
10456             ll 
< 0 || ll 
> LONG_MAX
) goto badfmt
; 
10457         server
.maxidletime 
= ll
; 
10458     } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) { 
10459         if (!strcasecmp(o
->ptr
,"no")) { 
10460             server
.appendfsync 
= APPENDFSYNC_NO
; 
10461         } else if (!strcasecmp(o
->ptr
,"everysec")) { 
10462             server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
10463         } else if (!strcasecmp(o
->ptr
,"always")) { 
10464             server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
10468     } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) { 
10469         int yn 
= yesnotoi(o
->ptr
); 
10471         if (yn 
== -1) goto badfmt
; 
10472         server
.no_appendfsync_on_rewrite 
= yn
; 
10473     } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) { 
10474         int old 
= server
.appendonly
; 
10475         int new = yesnotoi(o
->ptr
); 
10477         if (new == -1) goto badfmt
; 
10482                 if (startAppendOnly() == REDIS_ERR
) { 
10483                     addReplySds(c
,sdscatprintf(sdsempty(), 
10484                         "-ERR Unable to turn on AOF. Check server logs.\r\n")); 
10490     } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) { 
10492         sds 
*v 
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
); 
10494         /* Perform sanity check before setting the new config: 
10495          * - Even number of args 
10496          * - Seconds >= 1, changes >= 0 */ 
10498             sdsfreesplitres(v
,vlen
); 
10501         for (j 
= 0; j 
< vlen
; j
++) { 
10505             val 
= strtoll(v
[j
], &eptr
, 10); 
10506             if (eptr
[0] != '\0' || 
10507                 ((j 
& 1) == 0 && val 
< 1) || 
10508                 ((j 
& 1) == 1 && val 
< 0)) { 
10509                 sdsfreesplitres(v
,vlen
); 
10513         /* Finally set the new config */ 
10514         resetServerSaveParams(); 
10515         for (j 
= 0; j 
< vlen
; j 
+= 2) { 
10519             seconds 
= strtoll(v
[j
],NULL
,10); 
10520             changes 
= strtoll(v
[j
+1],NULL
,10); 
10521             appendServerSaveParams(seconds
, changes
); 
10523         sdsfreesplitres(v
,vlen
); 
10525         addReplySds(c
,sdscatprintf(sdsempty(), 
10526             "-ERR not supported CONFIG parameter %s\r\n", 
10527             (char*)c
->argv
[2]->ptr
)); 
10532     addReply(c
,shared
.ok
); 
10535 badfmt
: /* Bad format errors */ 
10536     addReplySds(c
,sdscatprintf(sdsempty(), 
10537         "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n", 
10539             (char*)c
->argv
[2]->ptr
)); 
10543 static void configGetCommand(redisClient 
*c
) { 
10544     robj 
*o 
= getDecodedObject(c
->argv
[2]); 
10545     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
10546     char *pattern 
= o
->ptr
; 
10549     addReply(c
,lenobj
); 
10550     decrRefCount(lenobj
); 
10552     if (stringmatch(pattern
,"dbfilename",0)) { 
10553         addReplyBulkCString(c
,"dbfilename"); 
10554         addReplyBulkCString(c
,server
.dbfilename
); 
10557     if (stringmatch(pattern
,"requirepass",0)) { 
10558         addReplyBulkCString(c
,"requirepass"); 
10559         addReplyBulkCString(c
,server
.requirepass
); 
10562     if (stringmatch(pattern
,"masterauth",0)) { 
10563         addReplyBulkCString(c
,"masterauth"); 
10564         addReplyBulkCString(c
,server
.masterauth
); 
10567     if (stringmatch(pattern
,"maxmemory",0)) { 
10570         ll2string(buf
,128,server
.maxmemory
); 
10571         addReplyBulkCString(c
,"maxmemory"); 
10572         addReplyBulkCString(c
,buf
); 
10575     if (stringmatch(pattern
,"timeout",0)) { 
10578         ll2string(buf
,128,server
.maxidletime
); 
10579         addReplyBulkCString(c
,"timeout"); 
10580         addReplyBulkCString(c
,buf
); 
10583     if (stringmatch(pattern
,"appendonly",0)) { 
10584         addReplyBulkCString(c
,"appendonly"); 
10585         addReplyBulkCString(c
,server
.appendonly 
? "yes" : "no"); 
10588     if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) { 
10589         addReplyBulkCString(c
,"no-appendfsync-on-rewrite"); 
10590         addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite 
? "yes" : "no"); 
10593     if (stringmatch(pattern
,"appendfsync",0)) { 
10596         switch(server
.appendfsync
) { 
10597         case APPENDFSYNC_NO
: policy 
= "no"; break; 
10598         case APPENDFSYNC_EVERYSEC
: policy 
= "everysec"; break; 
10599         case APPENDFSYNC_ALWAYS
: policy 
= "always"; break; 
10600         default: policy 
= "unknown"; break; /* too harmless to panic */ 
10602         addReplyBulkCString(c
,"appendfsync"); 
10603         addReplyBulkCString(c
,policy
); 
10606     if (stringmatch(pattern
,"save",0)) { 
10607         sds buf 
= sdsempty(); 
10610         for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
10611             buf 
= sdscatprintf(buf
,"%ld %d", 
10612                     server
.saveparams
[j
].seconds
, 
10613                     server
.saveparams
[j
].changes
); 
10614             if (j 
!= server
.saveparamslen
-1) 
10615                 buf 
= sdscatlen(buf
," ",1); 
10617         addReplyBulkCString(c
,"save"); 
10618         addReplyBulkCString(c
,buf
); 
10623     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2); 
10626 static void configCommand(redisClient 
*c
) { 
10627     if (!strcasecmp(c
->argv
[1]->ptr
,"set")) { 
10628         if (c
->argc 
!= 4) goto badarity
; 
10629         configSetCommand(c
); 
10630     } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) { 
10631         if (c
->argc 
!= 3) goto badarity
; 
10632         configGetCommand(c
); 
10633     } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) { 
10634         if (c
->argc 
!= 2) goto badarity
; 
10635         server
.stat_numcommands 
= 0; 
10636         server
.stat_numconnections 
= 0; 
10637         server
.stat_expiredkeys 
= 0; 
10638         server
.stat_starttime 
= time(NULL
); 
10639         addReply(c
,shared
.ok
); 
10641         addReplySds(c
,sdscatprintf(sdsempty(), 
10642             "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n")); 
10647     addReplySds(c
,sdscatprintf(sdsempty(), 
10648         "-ERR Wrong number of arguments for CONFIG %s\r\n", 
10649         (char*) c
->argv
[1]->ptr
)); 
10652 /* =========================== Pubsub implementation ======================== */ 
10654 static void freePubsubPattern(void *p
) { 
10655     pubsubPattern 
*pat 
= p
; 
10657     decrRefCount(pat
->pattern
); 
10661 static int listMatchPubsubPattern(void *a
, void *b
) { 
10662     pubsubPattern 
*pa 
= a
, *pb 
= b
; 
10664     return (pa
->client 
== pb
->client
) && 
10665            (equalStringObjects(pa
->pattern
,pb
->pattern
)); 
10668 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or 
10669  * 0 if the client was already subscribed to that channel. */ 
10670 static int pubsubSubscribeChannel(redisClient 
*c
, robj 
*channel
) { 
10671     struct dictEntry 
*de
; 
10672     list 
*clients 
= NULL
; 
10675     /* Add the channel to the client -> channels hash table */ 
10676     if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) { 
10678         incrRefCount(channel
); 
10679         /* Add the client to the channel -> list of clients hash table */ 
10680         de 
= dictFind(server
.pubsub_channels
,channel
); 
10682             clients 
= listCreate(); 
10683             dictAdd(server
.pubsub_channels
,channel
,clients
); 
10684             incrRefCount(channel
); 
10686             clients 
= dictGetEntryVal(de
); 
10688         listAddNodeTail(clients
,c
); 
10690     /* Notify the client */ 
10691     addReply(c
,shared
.mbulk3
); 
10692     addReply(c
,shared
.subscribebulk
); 
10693     addReplyBulk(c
,channel
); 
10694     addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
10698 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
10699  * 0 if the client was not subscribed to the specified channel. */ 
10700 static int pubsubUnsubscribeChannel(redisClient 
*c
, robj 
*channel
, int notify
) { 
10701     struct dictEntry 
*de
; 
10706     /* Remove the channel from the client -> channels hash table */ 
10707     incrRefCount(channel
); /* channel may be just a pointer to the same object 
10708                             we have in the hash tables. Protect it... */ 
10709     if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) { 
10711         /* Remove the client from the channel -> clients list hash table */ 
10712         de 
= dictFind(server
.pubsub_channels
,channel
); 
10713         assert(de 
!= NULL
); 
10714         clients 
= dictGetEntryVal(de
); 
10715         ln 
= listSearchKey(clients
,c
); 
10716         assert(ln 
!= NULL
); 
10717         listDelNode(clients
,ln
); 
10718         if (listLength(clients
) == 0) { 
10719             /* Free the list and associated hash entry at all if this was 
10720              * the latest client, so that it will be possible to abuse 
10721              * Redis PUBSUB creating millions of channels. */ 
10722             dictDelete(server
.pubsub_channels
,channel
); 
10725     /* Notify the client */ 
10727         addReply(c
,shared
.mbulk3
); 
10728         addReply(c
,shared
.unsubscribebulk
); 
10729         addReplyBulk(c
,channel
); 
10730         addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+ 
10731                        listLength(c
->pubsub_patterns
)); 
10734     decrRefCount(channel
); /* it is finally safe to release it */ 
10738 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */ 
10739 static int pubsubSubscribePattern(redisClient 
*c
, robj 
*pattern
) { 
10742     if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) { 
10744         pubsubPattern 
*pat
; 
10745         listAddNodeTail(c
->pubsub_patterns
,pattern
); 
10746         incrRefCount(pattern
); 
10747         pat 
= zmalloc(sizeof(*pat
)); 
10748         pat
->pattern 
= getDecodedObject(pattern
); 
10750         listAddNodeTail(server
.pubsub_patterns
,pat
); 
10752     /* Notify the client */ 
10753     addReply(c
,shared
.mbulk3
); 
10754     addReply(c
,shared
.psubscribebulk
); 
10755     addReplyBulk(c
,pattern
); 
10756     addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
10760 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
10761  * 0 if the client was not subscribed to the specified channel. */ 
10762 static int pubsubUnsubscribePattern(redisClient 
*c
, robj 
*pattern
, int notify
) { 
10767     incrRefCount(pattern
); /* Protect the object. May be the same we remove */ 
10768     if ((ln 
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) { 
10770         listDelNode(c
->pubsub_patterns
,ln
); 
10772         pat
.pattern 
= pattern
; 
10773         ln 
= listSearchKey(server
.pubsub_patterns
,&pat
); 
10774         listDelNode(server
.pubsub_patterns
,ln
); 
10776     /* Notify the client */ 
10778         addReply(c
,shared
.mbulk3
); 
10779         addReply(c
,shared
.punsubscribebulk
); 
10780         addReplyBulk(c
,pattern
); 
10781         addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+ 
10782                        listLength(c
->pubsub_patterns
)); 
10784     decrRefCount(pattern
); 
10788 /* Unsubscribe from all the channels. Return the number of channels the 
10789  * client was subscribed from. */ 
10790 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
) { 
10791     dictIterator 
*di 
= dictGetIterator(c
->pubsub_channels
); 
10795     while((de 
= dictNext(di
)) != NULL
) { 
10796         robj 
*channel 
= dictGetEntryKey(de
); 
10798         count 
+= pubsubUnsubscribeChannel(c
,channel
,notify
); 
10800     dictReleaseIterator(di
); 
10804 /* Unsubscribe from all the patterns. Return the number of patterns the 
10805  * client was subscribed from. */ 
10806 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
) { 
10811     listRewind(c
->pubsub_patterns
,&li
); 
10812     while ((ln 
= listNext(&li
)) != NULL
) { 
10813         robj 
*pattern 
= ln
->value
; 
10815         count 
+= pubsubUnsubscribePattern(c
,pattern
,notify
); 
10820 /* Publish a message */ 
10821 static int pubsubPublishMessage(robj 
*channel
, robj 
*message
) { 
10823     struct dictEntry 
*de
; 
10827     /* Send to clients listening for that channel */ 
10828     de 
= dictFind(server
.pubsub_channels
,channel
); 
10830         list 
*list 
= dictGetEntryVal(de
); 
10834         listRewind(list
,&li
); 
10835         while ((ln 
= listNext(&li
)) != NULL
) { 
10836             redisClient 
*c 
= ln
->value
; 
10838             addReply(c
,shared
.mbulk3
); 
10839             addReply(c
,shared
.messagebulk
); 
10840             addReplyBulk(c
,channel
); 
10841             addReplyBulk(c
,message
); 
10845     /* Send to clients listening to matching channels */ 
10846     if (listLength(server
.pubsub_patterns
)) { 
10847         listRewind(server
.pubsub_patterns
,&li
); 
10848         channel 
= getDecodedObject(channel
); 
10849         while ((ln 
= listNext(&li
)) != NULL
) { 
10850             pubsubPattern 
*pat 
= ln
->value
; 
10852             if (stringmatchlen((char*)pat
->pattern
->ptr
, 
10853                                 sdslen(pat
->pattern
->ptr
), 
10854                                 (char*)channel
->ptr
, 
10855                                 sdslen(channel
->ptr
),0)) { 
10856                 addReply(pat
->client
,shared
.mbulk4
); 
10857                 addReply(pat
->client
,shared
.pmessagebulk
); 
10858                 addReplyBulk(pat
->client
,pat
->pattern
); 
10859                 addReplyBulk(pat
->client
,channel
); 
10860                 addReplyBulk(pat
->client
,message
); 
10864         decrRefCount(channel
); 
10869 static void subscribeCommand(redisClient 
*c
) { 
10872     for (j 
= 1; j 
< c
->argc
; j
++) 
10873         pubsubSubscribeChannel(c
,c
->argv
[j
]); 
10876 static void unsubscribeCommand(redisClient 
*c
) { 
10877     if (c
->argc 
== 1) { 
10878         pubsubUnsubscribeAllChannels(c
,1); 
10883         for (j 
= 1; j 
< c
->argc
; j
++) 
10884             pubsubUnsubscribeChannel(c
,c
->argv
[j
],1); 
10888 static void psubscribeCommand(redisClient 
*c
) { 
10891     for (j 
= 1; j 
< c
->argc
; j
++) 
10892         pubsubSubscribePattern(c
,c
->argv
[j
]); 
10895 static void punsubscribeCommand(redisClient 
*c
) { 
10896     if (c
->argc 
== 1) { 
10897         pubsubUnsubscribeAllPatterns(c
,1); 
10902         for (j 
= 1; j 
< c
->argc
; j
++) 
10903             pubsubUnsubscribePattern(c
,c
->argv
[j
],1); 
10907 static void publishCommand(redisClient 
*c
) { 
10908     int receivers 
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]); 
10909     addReplyLongLong(c
,receivers
); 
10912 /* ===================== WATCH (CAS alike for MULTI/EXEC) =================== 
10914  * The implementation uses a per-DB hash table mapping keys to list of clients 
10915  * WATCHing those keys, so that given a key that is going to be modified 
10916  * we can mark all the associated clients as dirty. 
10918  * Also every client contains a list of WATCHed keys so that's possible to 
10919  * un-watch such keys when the client is freed or when UNWATCH is called. */ 
10921 /* In the client->watched_keys list we need to use watchedKey structures 
10922  * as in order to identify a key in Redis we need both the key name and the 
10924 typedef struct watchedKey 
{ 
10929 /* Watch for the specified key */ 
10930 static void watchForKey(redisClient 
*c
, robj 
*key
) { 
10931     list 
*clients 
= NULL
; 
10936     /* Check if we are already watching for this key */ 
10937     listRewind(c
->watched_keys
,&li
); 
10938     while((ln 
= listNext(&li
))) { 
10939         wk 
= listNodeValue(ln
); 
10940         if (wk
->db 
== c
->db 
&& equalStringObjects(key
,wk
->key
)) 
10941             return; /* Key already watched */ 
10943     /* This key is not already watched in this DB. Let's add it */ 
10944     clients 
= dictFetchValue(c
->db
->watched_keys
,key
); 
10946         clients 
= listCreate(); 
10947         dictAdd(c
->db
->watched_keys
,key
,clients
); 
10950     listAddNodeTail(clients
,c
); 
10951     /* Add the new key to the lits of keys watched by this client */ 
10952     wk 
= zmalloc(sizeof(*wk
)); 
10956     listAddNodeTail(c
->watched_keys
,wk
); 
10959 /* Unwatch all the keys watched by this client. To clean the EXEC dirty 
10960  * flag is up to the caller. */ 
10961 static void unwatchAllKeys(redisClient 
*c
) { 
10965     if (listLength(c
->watched_keys
) == 0) return; 
10966     listRewind(c
->watched_keys
,&li
); 
10967     while((ln 
= listNext(&li
))) { 
10971         /* Lookup the watched key -> clients list and remove the client 
10973         wk 
= listNodeValue(ln
); 
10974         clients 
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
); 
10975         assert(clients 
!= NULL
); 
10976         listDelNode(clients
,listSearchKey(clients
,c
)); 
10977         /* Kill the entry at all if this was the only client */ 
10978         if (listLength(clients
) == 0) 
10979             dictDelete(wk
->db
->watched_keys
, wk
->key
); 
10980         /* Remove this watched key from the client->watched list */ 
10981         listDelNode(c
->watched_keys
,ln
); 
10982         decrRefCount(wk
->key
); 
10987 /* "Touch" a key, so that if this key is being WATCHed by some client the 
10988  * next EXEC will fail. */ 
10989 static void touchWatchedKey(redisDb 
*db
, robj 
*key
) { 
10994     if (dictSize(db
->watched_keys
) == 0) return; 
10995     clients 
= dictFetchValue(db
->watched_keys
, key
); 
10996     if (!clients
) return; 
10998     /* Mark all the clients watching this key as REDIS_DIRTY_CAS */ 
10999     /* Check if we are already watching for this key */ 
11000     listRewind(clients
,&li
); 
11001     while((ln 
= listNext(&li
))) { 
11002         redisClient 
*c 
= listNodeValue(ln
); 
11004         c
->flags 
|= REDIS_DIRTY_CAS
; 
11008 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the 
11009  * flush but will be deleted as effect of the flushing operation should 
11010  * be touched. "dbid" is the DB that's getting the flush. -1 if it is 
11011  * a FLUSHALL operation (all the DBs flushed). */ 
11012 static void touchWatchedKeysOnFlush(int dbid
) { 
11016     /* For every client, check all the waited keys */ 
11017     listRewind(server
.clients
,&li1
); 
11018     while((ln 
= listNext(&li1
))) { 
11019         redisClient 
*c 
= listNodeValue(ln
); 
11020         listRewind(c
->watched_keys
,&li2
); 
11021         while((ln 
= listNext(&li2
))) { 
11022             watchedKey 
*wk 
= listNodeValue(ln
); 
11024             /* For every watched key matching the specified DB, if the 
11025              * key exists, mark the client as dirty, as the key will be 
11027             if (dbid 
== -1 || wk
->db
->id 
== dbid
) { 
11028                 if (dictFind(wk
->db
->dict
, wk
->key
->ptr
) != NULL
) 
11029                     c
->flags 
|= REDIS_DIRTY_CAS
; 
11035 static void watchCommand(redisClient 
*c
) { 
11038     if (c
->flags 
& REDIS_MULTI
) { 
11039         addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n")); 
11042     for (j 
= 1; j 
< c
->argc
; j
++) 
11043         watchForKey(c
,c
->argv
[j
]); 
11044     addReply(c
,shared
.ok
); 
11047 static void unwatchCommand(redisClient 
*c
) { 
11049     c
->flags 
&= (~REDIS_DIRTY_CAS
); 
11050     addReply(c
,shared
.ok
); 
11053 /* ================================= Debugging ============================== */ 
11055 /* Compute the sha1 of string at 's' with 'len' bytes long. 
11056  * The SHA1 is then xored againt the string pointed by digest. 
11057  * Since xor is commutative, this operation is used in order to 
11058  * "add" digests relative to unordered elements. 
11060  * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */ 
11061 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) { 
11063     unsigned char hash
[20], *s 
= ptr
; 
11067     SHA1Update(&ctx
,s
,len
); 
11068     SHA1Final(hash
,&ctx
); 
11070     for (j 
= 0; j 
< 20; j
++) 
11071         digest
[j
] ^= hash
[j
]; 
11074 static void xorObjectDigest(unsigned char *digest
, robj 
*o
) { 
11075     o 
= getDecodedObject(o
); 
11076     xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
)); 
11080 /* This function instead of just computing the SHA1 and xoring it 
11081  * against diget, also perform the digest of "digest" itself and 
11082  * replace the old value with the new one. 
11084  * So the final digest will be: 
11086  * digest = SHA1(digest xor SHA1(data)) 
11088  * This function is used every time we want to preserve the order so 
11089  * that digest(a,b,c,d) will be different than digest(b,c,d,a) 
11091  * Also note that mixdigest("foo") followed by mixdigest("bar") 
11092  * will lead to a different digest compared to "fo", "obar". 
11094 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) { 
11098     xorDigest(digest
,s
,len
); 
11100     SHA1Update(&ctx
,digest
,20); 
11101     SHA1Final(digest
,&ctx
); 
11104 static void mixObjectDigest(unsigned char *digest
, robj 
*o
) { 
11105     o 
= getDecodedObject(o
); 
11106     mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
)); 
11110 /* Compute the dataset digest. Since keys, sets elements, hashes elements 
11111  * are not ordered, we use a trick: every aggregate digest is the xor 
11112  * of the digests of their elements. This way the order will not change 
11113  * the result. For list instead we use a feedback entering the output digest 
11114  * as input in order to ensure that a different ordered list will result in 
11115  * a different digest. */ 
11116 static void computeDatasetDigest(unsigned char *final
) { 
11117     unsigned char digest
[20]; 
11119     dictIterator 
*di 
= NULL
; 
11124     memset(final
,0,20); /* Start with a clean result */ 
11126     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
11127         redisDb 
*db 
= server
.db
+j
; 
11129         if (dictSize(db
->dict
) == 0) continue; 
11130         di 
= dictGetIterator(db
->dict
); 
11132         /* hash the DB id, so the same dataset moved in a different 
11133          * DB will lead to a different digest */ 
11135         mixDigest(final
,&aux
,sizeof(aux
)); 
11137         /* Iterate this DB writing every entry */ 
11138         while((de 
= dictNext(di
)) != NULL
) { 
11143             memset(digest
,0,20); /* This key-val digest */ 
11144             key 
= dictGetEntryKey(de
); 
11145             keyobj 
= createStringObject(key
,sdslen(key
)); 
11147             mixDigest(digest
,key
,sdslen(key
)); 
11149             /* Make sure the key is loaded if VM is active */ 
11150             o 
= lookupKeyRead(db
,keyobj
); 
11152             aux 
= htonl(o
->type
); 
11153             mixDigest(digest
,&aux
,sizeof(aux
)); 
11154             expiretime 
= getExpire(db
,keyobj
); 
11156             /* Save the key and associated value */ 
11157             if (o
->type 
== REDIS_STRING
) { 
11158                 mixObjectDigest(digest
,o
); 
11159             } else if (o
->type 
== REDIS_LIST
) { 
11160                 listTypeIterator 
*li 
= listTypeInitIterator(o
,0,REDIS_TAIL
); 
11161                 listTypeEntry entry
; 
11162                 while(listTypeNext(li
,&entry
)) { 
11163                     robj 
*eleobj 
= listTypeGet(&entry
); 
11164                     mixObjectDigest(digest
,eleobj
); 
11165                     decrRefCount(eleobj
); 
11167                 listTypeReleaseIterator(li
); 
11168             } else if (o
->type 
== REDIS_SET
) { 
11169                 dict 
*set 
= o
->ptr
; 
11170                 dictIterator 
*di 
= dictGetIterator(set
); 
11173                 while((de 
= dictNext(di
)) != NULL
) { 
11174                     robj 
*eleobj 
= dictGetEntryKey(de
); 
11176                     xorObjectDigest(digest
,eleobj
); 
11178                 dictReleaseIterator(di
); 
11179             } else if (o
->type 
== REDIS_ZSET
) { 
11181                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
11184                 while((de 
= dictNext(di
)) != NULL
) { 
11185                     robj 
*eleobj 
= dictGetEntryKey(de
); 
11186                     double *score 
= dictGetEntryVal(de
); 
11187                     unsigned char eledigest
[20]; 
11189                     snprintf(buf
,sizeof(buf
),"%.17g",*score
); 
11190                     memset(eledigest
,0,20); 
11191                     mixObjectDigest(eledigest
,eleobj
); 
11192                     mixDigest(eledigest
,buf
,strlen(buf
)); 
11193                     xorDigest(digest
,eledigest
,20); 
11195                 dictReleaseIterator(di
); 
11196             } else if (o
->type 
== REDIS_HASH
) { 
11197                 hashTypeIterator 
*hi
; 
11200                 hi 
= hashTypeInitIterator(o
); 
11201                 while (hashTypeNext(hi
) != REDIS_ERR
) { 
11202                     unsigned char eledigest
[20]; 
11204                     memset(eledigest
,0,20); 
11205                     obj 
= hashTypeCurrent(hi
,REDIS_HASH_KEY
); 
11206                     mixObjectDigest(eledigest
,obj
); 
11208                     obj 
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
); 
11209                     mixObjectDigest(eledigest
,obj
); 
11211                     xorDigest(digest
,eledigest
,20); 
11213                 hashTypeReleaseIterator(hi
); 
11215                 redisPanic("Unknown object type"); 
11217             /* If the key has an expire, add it to the mix */ 
11218             if (expiretime 
!= -1) xorDigest(digest
,"!!expire!!",10); 
11219             /* We can finally xor the key-val digest to the final digest */ 
11220             xorDigest(final
,digest
,20); 
11221             decrRefCount(keyobj
); 
11223         dictReleaseIterator(di
); 
11227 static void debugCommand(redisClient 
*c
) { 
11228     if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) { 
11229         *((char*)-1) = 'x'; 
11230     } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) { 
11231         if (rdbSave(server
.dbfilename
) != REDIS_OK
) { 
11232             addReply(c
,shared
.err
); 
11236         if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
11237             addReply(c
,shared
.err
); 
11240         redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD"); 
11241         addReply(c
,shared
.ok
); 
11242     } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) { 
11244         if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) { 
11245             addReply(c
,shared
.err
); 
11248         redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF"); 
11249         addReply(c
,shared
.ok
); 
11250     } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc 
== 3) { 
11251         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
); 
11255             addReply(c
,shared
.nokeyerr
); 
11258         val 
= dictGetEntryVal(de
); 
11259         if (!server
.vm_enabled 
|| (val
->storage 
== REDIS_VM_MEMORY 
|| 
11260                                    val
->storage 
== REDIS_VM_SWAPPING
)) { 
11264             if (val
->encoding 
< (sizeof(strencoding
)/sizeof(char*))) { 
11265                 strenc 
= strencoding
[val
->encoding
]; 
11267                 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
); 
11270             addReplySds(c
,sdscatprintf(sdsempty(), 
11271                 "+Value at:%p refcount:%d " 
11272                 "encoding:%s serializedlength:%lld\r\n", 
11273                 (void*)val
, val
->refcount
, 
11274                 strenc
, (long long) rdbSavedObjectLen(val
,NULL
))); 
11276             vmpointer 
*vp 
= (vmpointer
*) val
; 
11277             addReplySds(c
,sdscatprintf(sdsempty(), 
11278                 "+Value swapped at: page %llu " 
11279                 "using %llu pages\r\n", 
11280                 (unsigned long long) vp
->page
, 
11281                 (unsigned long long) vp
->usedpages
)); 
11283     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc 
== 3) { 
11284         lookupKeyRead(c
->db
,c
->argv
[2]); 
11285         addReply(c
,shared
.ok
); 
11286     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc 
== 3) { 
11287         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
); 
11291         if (!server
.vm_enabled
) { 
11292             addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n")); 
11296             addReply(c
,shared
.nokeyerr
); 
11299         val 
= dictGetEntryVal(de
); 
11301         if (val
->storage 
!= REDIS_VM_MEMORY
) { 
11302             addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n")); 
11303         } else if (val
->refcount 
!= 1) { 
11304             addReplySds(c
,sdsnew("-ERR Object is shared\r\n")); 
11305         } else if ((vp 
= vmSwapObjectBlocking(val
)) != NULL
) { 
11306             dictGetEntryVal(de
) = vp
; 
11307             addReply(c
,shared
.ok
); 
11309             addReply(c
,shared
.err
); 
11311     } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc 
== 3) { 
11316         if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
) 
11318         for (j 
= 0; j 
< keys
; j
++) { 
11319             snprintf(buf
,sizeof(buf
),"key:%lu",j
); 
11320             key 
= createStringObject(buf
,strlen(buf
)); 
11321             if (lookupKeyRead(c
->db
,key
) != NULL
) { 
11325             snprintf(buf
,sizeof(buf
),"value:%lu",j
); 
11326             val 
= createStringObject(buf
,strlen(buf
)); 
11327             dbAdd(c
->db
,key
,val
); 
11330         addReply(c
,shared
.ok
); 
11331     } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc 
== 2) { 
11332         unsigned char digest
[20]; 
11333         sds d 
= sdsnew("+"); 
11336         computeDatasetDigest(digest
); 
11337         for (j 
= 0; j 
< 20; j
++) 
11338             d 
= sdscatprintf(d
, "%02x",digest
[j
]); 
11340         d 
= sdscatlen(d
,"\r\n",2); 
11343         addReplySds(c
,sdsnew( 
11344             "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n")); 
11348 static void _redisAssert(char *estr
, char *file
, int line
) { 
11349     redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ==="); 
11350     redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
); 
11351 #ifdef HAVE_BACKTRACE 
11352     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
11353     *((char*)-1) = 'x'; 
11357 static void _redisPanic(char *msg
, char *file
, int line
) { 
11358     redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue"); 
11359     redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
); 
11360 #ifdef HAVE_BACKTRACE 
11361     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
11362     *((char*)-1) = 'x'; 
11366 /* =================================== Main! ================================ */ 
11369 int linuxOvercommitMemoryValue(void) { 
11370     FILE *fp 
= fopen("/proc/sys/vm/overcommit_memory","r"); 
11373     if (!fp
) return -1; 
11374     if (fgets(buf
,64,fp
) == NULL
) { 
11383 void linuxOvercommitMemoryWarning(void) { 
11384     if (linuxOvercommitMemoryValue() == 0) { 
11385         redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); 
11388 #endif /* __linux__ */ 
11390 static void daemonize(void) { 
11394     if (fork() != 0) exit(0); /* parent exits */ 
11395     setsid(); /* create a new session */ 
11397     /* Every output goes to /dev/null. If Redis is daemonized but 
11398      * the 'logfile' is set to 'stdout' in the configuration file 
11399      * it will not log at all. */ 
11400     if ((fd 
= open("/dev/null", O_RDWR
, 0)) != -1) { 
11401         dup2(fd
, STDIN_FILENO
); 
11402         dup2(fd
, STDOUT_FILENO
); 
11403         dup2(fd
, STDERR_FILENO
); 
11404         if (fd 
> STDERR_FILENO
) close(fd
); 
11406     /* Try to write the pid file */ 
11407     fp 
= fopen(server
.pidfile
,"w"); 
11409         fprintf(fp
,"%d\n",getpid()); 
11414 static void version() { 
11415     printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
, 
11416         REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0); 
11420 static void usage() { 
11421     fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n"); 
11422     fprintf(stderr
,"       ./redis-server - (read config from stdin)\n"); 
11426 int main(int argc
, char **argv
) { 
11429     initServerConfig(); 
11430     sortCommandTable(); 
11432         if (strcmp(argv
[1], "-v") == 0 || 
11433             strcmp(argv
[1], "--version") == 0) version(); 
11434         if (strcmp(argv
[1], "--help") == 0) usage(); 
11435         resetServerSaveParams(); 
11436         loadServerConfig(argv
[1]); 
11437     } else if ((argc 
> 2)) { 
11440         redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); 
11442     if (server
.daemonize
) daemonize(); 
11444     redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
); 
11446     linuxOvercommitMemoryWarning(); 
11448     start 
= time(NULL
); 
11449     if (server
.appendonly
) { 
11450         if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
) 
11451             redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
); 
11453         if (rdbLoad(server
.dbfilename
) == REDIS_OK
) 
11454             redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
); 
11456     redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
); 
11457     aeSetBeforeSleepProc(server
.el
,beforeSleep
); 
11459     aeDeleteEventLoop(server
.el
); 
11463 /* ============================= Backtrace support ========================= */ 
11465 #ifdef HAVE_BACKTRACE 
11466 static char *findFuncName(void *pointer
, unsigned long *offset
); 
11468 static void *getMcontextEip(ucontext_t 
*uc
) { 
11469 #if defined(__FreeBSD__) 
11470     return (void*) uc
->uc_mcontext
.mc_eip
; 
11471 #elif defined(__dietlibc__) 
11472     return (void*) uc
->uc_mcontext
.eip
; 
11473 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) 
11475     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
11477     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
11479 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6) 
11480   #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__) 
11481     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
11483     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
11485 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__) 
11486     return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */ 
11487 #elif defined(__ia64__) /* Linux IA64 */ 
11488     return (void*) uc
->uc_mcontext
.sc_ip
; 
11494 static void segvHandler(int sig
, siginfo_t 
*info
, void *secret
) { 
11496     char **messages 
= NULL
; 
11497     int i
, trace_size 
= 0; 
11498     unsigned long offset
=0; 
11499     ucontext_t 
*uc 
= (ucontext_t
*) secret
; 
11501     REDIS_NOTUSED(info
); 
11503     redisLog(REDIS_WARNING
, 
11504         "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
); 
11505     infostring 
= genRedisInfoString(); 
11506     redisLog(REDIS_WARNING
, "%s",infostring
); 
11507     /* It's not safe to sdsfree() the returned string under memory 
11508      * corruption conditions. Let it leak as we are going to abort */ 
11510     trace_size 
= backtrace(trace
, 100); 
11511     /* overwrite sigaction with caller's address */ 
11512     if (getMcontextEip(uc
) != NULL
) { 
11513         trace
[1] = getMcontextEip(uc
); 
11515     messages 
= backtrace_symbols(trace
, trace_size
); 
11517     for (i
=1; i
<trace_size
; ++i
) { 
11518         char *fn 
= findFuncName(trace
[i
], &offset
), *p
; 
11520         p 
= strchr(messages
[i
],'+'); 
11521         if (!fn 
|| (p 
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) { 
11522             redisLog(REDIS_WARNING
,"%s", messages
[i
]); 
11524             redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
); 
11527     /* free(messages); Don't call free() with possibly corrupted memory. */ 
11531 static void sigtermHandler(int sig
) { 
11532     REDIS_NOTUSED(sig
); 
11534     redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down..."); 
11535     server
.shutdown_asap 
= 1; 
11538 static void setupSigSegvAction(void) { 
11539     struct sigaction act
; 
11541     sigemptyset (&act
.sa_mask
); 
11542     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction 
11543      * is used. Otherwise, sa_handler is used */ 
11544     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND 
| SA_SIGINFO
; 
11545     act
.sa_sigaction 
= segvHandler
; 
11546     sigaction (SIGSEGV
, &act
, NULL
); 
11547     sigaction (SIGBUS
, &act
, NULL
); 
11548     sigaction (SIGFPE
, &act
, NULL
); 
11549     sigaction (SIGILL
, &act
, NULL
); 
11550     sigaction (SIGBUS
, &act
, NULL
); 
11552     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND
; 
11553     act
.sa_handler 
= sigtermHandler
; 
11554     sigaction (SIGTERM
, &act
, NULL
); 
11558 #include "staticsymbols.h" 
11559 /* This function try to convert a pointer into a function name. It's used in 
11560  * oreder to provide a backtrace under segmentation fault that's able to 
11561  * display functions declared as static (otherwise the backtrace is useless). */ 
11562 static char *findFuncName(void *pointer
, unsigned long *offset
){ 
11564     unsigned long off
, minoff 
= 0; 
11566     /* Try to match against the Symbol with the smallest offset */ 
11567     for (i
=0; symsTable
[i
].pointer
; i
++) { 
11568         unsigned long lp 
= (unsigned long) pointer
; 
11570         if (lp 
!= (unsigned long)-1 && lp 
>= symsTable
[i
].pointer
) { 
11571             off
=lp
-symsTable
[i
].pointer
; 
11572             if (ret 
< 0 || off 
< minoff
) { 
11578     if (ret 
== -1) return NULL
; 
11580     return symsTable
[ret
].name
; 
11582 #else /* HAVE_BACKTRACE */ 
11583 static void setupSigSegvAction(void) { 
11585 #endif /* HAVE_BACKTRACE */