2  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
   5  * Redistribution and use in source and binary forms, with or without 
   6  * modification, are permitted provided that the following conditions are met: 
   8  *   * Redistributions of source code must retain the above copyright notice, 
   9  *     this list of conditions and the following disclaimer. 
  10  *   * Redistributions in binary form must reproduce the above copyright 
  11  *     notice, this list of conditions and the following disclaimer in the 
  12  *     documentation and/or other materials provided with the distribution. 
  13  *   * Neither the name of Redis nor the names of its contributors may be used 
  14  *     to endorse or promote products derived from this software without 
  15  *     specific prior written permission. 
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  27  * POSSIBILITY OF SUCH DAMAGE. 
  30 #define REDIS_VERSION "2.1.1" 
  45 #endif /* HAVE_BACKTRACE */ 
  53 #include <arpa/inet.h> 
  57 #include <sys/resource.h> 
  65 #include "solarisfixes.h" 
  69 #include "ae.h"     /* Event driven programming library */ 
  70 #include "sds.h"    /* Dynamic safe strings */ 
  71 #include "anet.h"   /* Networking the easy way */ 
  72 #include "dict.h"   /* Hash tables */ 
  73 #include "adlist.h" /* Linked lists */ 
  74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */ 
  75 #include "lzf.h"    /* LZF compression library */ 
  76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ 
  77 #include "zipmap.h" /* Compact dictionary-alike data structure */ 
  78 #include "ziplist.h" /* Compact list data structure */ 
  79 #include "sha1.h"   /* SHA1 is used for DEBUG DIGEST */ 
  80 #include "release.h" /* Release and/or git repository information */ 
  86 /* Static server configuration */ 
  87 #define REDIS_SERVERPORT        6379    /* TCP port */ 
  88 #define REDIS_MAXIDLETIME       (60*5)  /* default client timeout */ 
  89 #define REDIS_IOBUF_LEN         1024 
  90 #define REDIS_LOADBUF_LEN       1024 
  91 #define REDIS_STATIC_ARGS       8 
  92 #define REDIS_DEFAULT_DBNUM     16 
  93 #define REDIS_CONFIGLINE_MAX    1024 
  94 #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */ 
  95 #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */ 
  96 #define REDIS_EXPIRELOOKUPS_PER_CRON    10 /* lookup 10 expires per loop */ 
  97 #define REDIS_MAX_WRITE_PER_EVENT (1024*64) 
  98 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ 
 100 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ 
 101 #define REDIS_WRITEV_THRESHOLD      3 
 102 /* Max number of iovecs used for each writev call */ 
 103 #define REDIS_WRITEV_IOVEC_COUNT    256 
 105 /* Hash table parameters */ 
 106 #define REDIS_HT_MINFILL        10      /* Minimal hash table fill 10% */ 
 109 #define REDIS_CMD_BULK          1       /* Bulk write command */ 
 110 #define REDIS_CMD_INLINE        2       /* Inline command */ 
 111 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with 
 112    this flags will return an error when the 'maxmemory' option is set in the 
 113    config file and the server is using more than maxmemory bytes of memory. 
 114    In short this commands are denied on low memory conditions. */ 
 115 #define REDIS_CMD_DENYOOM       4 
 116 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */ 
 119 #define REDIS_STRING 0 
 124 #define REDIS_VMPOINTER 8 
 126 /* Objects encoding. Some kind of objects like Strings and Hashes can be 
 127  * internally represented in multiple ways. The 'encoding' field of the object 
 128  * is set to one of this fields for this object. */ 
 129 #define REDIS_ENCODING_RAW 0     /* Raw representation */ 
 130 #define REDIS_ENCODING_INT 1     /* Encoded as integer */ 
 131 #define REDIS_ENCODING_HT 2      /* Encoded as hash table */ 
 132 #define REDIS_ENCODING_ZIPMAP 3  /* Encoded as zipmap */ 
 133 #define REDIS_ENCODING_LIST 4    /* Encoded as zipmap */ 
 134 #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */ 
 136 static char* strencoding
[] = { 
 137     "raw", "int", "hashtable", "zipmap", "list", "ziplist" 
 140 /* Object types only used for dumping to disk */ 
 141 #define REDIS_EXPIRETIME 253 
 142 #define REDIS_SELECTDB 254 
 143 #define REDIS_EOF 255 
 145 /* Defines related to the dump file format. To store 32 bits lengths for short 
 146  * keys requires a lot of space, so we check the most significant 2 bits of 
 147  * the first byte to interpreter the length: 
 149  * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte 
 150  * 01|000000 00000000 =>  01, the len is 14 byes, 6 bits + 8 bits of next byte 
 151  * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow 
 152  * 11|000000 this means: specially encoded object will follow. The six bits 
 153  *           number specify the kind of object that follows. 
 154  *           See the REDIS_RDB_ENC_* defines. 
 156  * Lenghts up to 63 are stored using a single byte, most DB keys, and may 
 157  * values, will fit inside. */ 
 158 #define REDIS_RDB_6BITLEN 0 
 159 #define REDIS_RDB_14BITLEN 1 
 160 #define REDIS_RDB_32BITLEN 2 
 161 #define REDIS_RDB_ENCVAL 3 
 162 #define REDIS_RDB_LENERR UINT_MAX 
 164 /* When a length of a string object stored on disk has the first two bits 
 165  * set, the remaining two bits specify a special encoding for the object 
 166  * accordingly to the following defines: */ 
 167 #define REDIS_RDB_ENC_INT8 0        /* 8 bit signed integer */ 
 168 #define REDIS_RDB_ENC_INT16 1       /* 16 bit signed integer */ 
 169 #define REDIS_RDB_ENC_INT32 2       /* 32 bit signed integer */ 
 170 #define REDIS_RDB_ENC_LZF 3         /* string compressed with FASTLZ */ 
 172 /* Virtual memory object->where field. */ 
 173 #define REDIS_VM_MEMORY 0       /* The object is on memory */ 
 174 #define REDIS_VM_SWAPPED 1      /* The object is on disk */ 
 175 #define REDIS_VM_SWAPPING 2     /* Redis is swapping this object on disk */ 
 176 #define REDIS_VM_LOADING 3      /* Redis is loading this object from disk */ 
 178 /* Virtual memory static configuration stuff. 
 179  * Check vmFindContiguousPages() to know more about this magic numbers. */ 
 180 #define REDIS_VM_MAX_NEAR_PAGES 65536 
 181 #define REDIS_VM_MAX_RANDOM_JUMP 4096 
 182 #define REDIS_VM_MAX_THREADS 32 
 183 #define REDIS_THREAD_STACK_SIZE (1024*1024*4) 
 184 /* The following is the *percentage* of completed I/O jobs to process when the 
 185  * handelr is called. While Virtual Memory I/O operations are performed by 
 186  * threads, this operations must be processed by the main thread when completed 
 187  * in order to take effect. */ 
 188 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 
 191 #define REDIS_SLAVE 1       /* This client is a slave server */ 
 192 #define REDIS_MASTER 2      /* This client is a master server */ 
 193 #define REDIS_MONITOR 4     /* This client is a slave monitor, see MONITOR */ 
 194 #define REDIS_MULTI 8       /* This client is in a MULTI context */ 
 195 #define REDIS_BLOCKED 16    /* The client is waiting in a blocking operation */ 
 196 #define REDIS_IO_WAIT 32    /* The client is waiting for Virtual Memory I/O */ 
 197 #define REDIS_DIRTY_CAS 64  /* Watched keys modified. EXEC will fail. */ 
 199 /* Slave replication state - slave side */ 
 200 #define REDIS_REPL_NONE 0   /* No active replication */ 
 201 #define REDIS_REPL_CONNECT 1    /* Must connect to master */ 
 202 #define REDIS_REPL_CONNECTED 2  /* Connected to master */ 
 204 /* Slave replication state - from the point of view of master 
 205  * Note that in SEND_BULK and ONLINE state the slave receives new updates 
 206  * in its output queue. In the WAIT_BGSAVE state instead the server is waiting 
 207  * to start the next background saving in order to send updates to it. */ 
 208 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ 
 209 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ 
 210 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ 
 211 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ 
 213 /* List related stuff */ 
 217 /* Sort operations */ 
 218 #define REDIS_SORT_GET 0 
 219 #define REDIS_SORT_ASC 1 
 220 #define REDIS_SORT_DESC 2 
 221 #define REDIS_SORTKEY_MAX 1024 
 224 #define REDIS_DEBUG 0 
 225 #define REDIS_VERBOSE 1 
 226 #define REDIS_NOTICE 2 
 227 #define REDIS_WARNING 3 
 229 /* Anti-warning macro... */ 
 230 #define REDIS_NOTUSED(V) ((void) V) 
 232 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ 
 233 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */ 
 235 /* Append only defines */ 
 236 #define APPENDFSYNC_NO 0 
 237 #define APPENDFSYNC_ALWAYS 1 
 238 #define APPENDFSYNC_EVERYSEC 2 
 240 /* Zip structure related defaults */ 
 241 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64 
 242 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 
 243 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024 
 244 #define REDIS_LIST_MAX_ZIPLIST_VALUE 32 
 246 /* We can print the stacktrace, so our assert is defined this way: */ 
 247 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) 
 248 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1) 
 249 static void _redisAssert(char *estr
, char *file
, int line
); 
 250 static void _redisPanic(char *msg
, char *file
, int line
); 
 252 /*================================= Data types ============================== */ 
 254 /* A redis object, that is a type able to hold a string / list / set */ 
 256 /* The actual Redis Object */ 
 257 typedef struct redisObject 
{ 
 259     unsigned storage
:2;     /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */ 
 261     unsigned lru
:22;        /* lru time (relative to server.lruclock) */ 
 264     /* VM fields, this are only allocated if VM is active, otherwise the 
 265      * object allocation function will just allocate 
 266      * sizeof(redisObjct) minus sizeof(redisObjectVM), so using 
 267      * Redis without VM active will not have any overhead. */ 
 270 /* The VM pointer structure - identifies an object in the swap file. 
 272  * This object is stored in place of the value 
 273  * object in the main key->value hash table representing a database. 
 274  * Note that the first fields (type, storage) are the same as the redisObject 
 275  * structure so that vmPointer strucuters can be accessed even when casted 
 276  * as redisObject structures. 
 278  * This is useful as we don't know if a value object is or not on disk, but we 
 279  * are always able to read obj->storage to check this. For vmPointer 
 280  * structures "type" is set to REDIS_VMPOINTER (even if without this field 
 281  * is still possible to check the kind of object from the value of 'storage').*/ 
 282 typedef struct vmPointer 
{ 
 284     unsigned storage
:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
 286     unsigned int vtype
; /* type of the object stored in the swap file */ 
 287     off_t page
;         /* the page at witch the object is stored on disk */ 
 288     off_t usedpages
;    /* number of pages used on disk */ 
 291 /* Macro used to initalize a Redis object allocated on the stack. 
 292  * Note that this macro is taken near the structure definition to make sure 
 293  * we'll update it when the structure is changed, to avoid bugs like 
 294  * bug #85 introduced exactly in this way. */ 
 295 #define initStaticStringObject(_var,_ptr) do { \ 
 297     _var.type = REDIS_STRING; \ 
 298     _var.encoding = REDIS_ENCODING_RAW; \ 
 300     _var.storage = REDIS_VM_MEMORY; \ 
 303 typedef struct redisDb 
{ 
 304     dict 
*dict
;                 /* The keyspace for this DB */ 
 305     dict 
*expires
;              /* Timeout of keys with a timeout set */ 
 306     dict 
*blocking_keys
;        /* Keys with clients waiting for data (BLPOP) */ 
 307     dict 
*io_keys
;              /* Keys with clients waiting for VM I/O */ 
 308     dict 
*watched_keys
;         /* WATCHED keys for MULTI/EXEC CAS */ 
 312 /* Client MULTI/EXEC state */ 
 313 typedef struct multiCmd 
{ 
 316     struct redisCommand 
*cmd
; 
 319 typedef struct multiState 
{ 
 320     multiCmd 
*commands
;     /* Array of MULTI commands */ 
 321     int count
;              /* Total number of MULTI commands */ 
 324 /* With multiplexing we need to take per-clinet state. 
 325  * Clients are taken in a liked list. */ 
 326 typedef struct redisClient 
{ 
 331     robj 
**argv
, **mbargv
; 
 333     int bulklen
;            /* bulk read len. -1 if not in bulk read mode */ 
 334     int multibulk
;          /* multi bulk command format active */ 
 337     time_t lastinteraction
; /* time of the last interaction, used for timeout */ 
 338     int flags
;              /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ 
 339     int slaveseldb
;         /* slave selected db, if this client is a slave */ 
 340     int authenticated
;      /* when requirepass is non-NULL */ 
 341     int replstate
;          /* replication state if this is a slave */ 
 342     int repldbfd
;           /* replication DB file descriptor */ 
 343     long repldboff
;         /* replication DB file offset */ 
 344     off_t repldbsize
;       /* replication DB file size */ 
 345     multiState mstate
;      /* MULTI/EXEC state */ 
 346     robj 
**blocking_keys
;   /* The key we are waiting to terminate a blocking 
 347                              * operation such as BLPOP. Otherwise NULL. */ 
 348     int blocking_keys_num
;  /* Number of blocking keys */ 
 349     time_t blockingto
;      /* Blocking operation timeout. If UNIX current time 
 350                              * is >= blockingto then the operation timed out. */ 
 351     list 
*io_keys
;          /* Keys this client is waiting to be loaded from the 
 352                              * swap file in order to continue. */ 
 353     list 
*watched_keys
;     /* Keys WATCHED for MULTI/EXEC CAS */ 
 354     dict 
*pubsub_channels
;  /* channels a client is interested in (SUBSCRIBE) */ 
 355     list 
*pubsub_patterns
;  /* patterns a client is interested in (SUBSCRIBE) */ 
 363 /* Global server state structure */ 
 368     long long dirty
;            /* changes to DB from the last save */ 
 370     list 
*slaves
, *monitors
; 
 371     char neterr
[ANET_ERR_LEN
]; 
 373     int cronloops
;              /* number of times the cron function run */ 
 374     list 
*objfreelist
;          /* A list of freed objects to avoid malloc() */ 
 375     time_t lastsave
;            /* Unix time of last save succeeede */ 
 376     /* Fields used only for stats */ 
 377     time_t stat_starttime
;         /* server start time */ 
 378     long long stat_numcommands
;    /* number of processed commands */ 
 379     long long stat_numconnections
; /* number of connections received */ 
 380     long long stat_expiredkeys
;   /* number of expired keys */ 
 389     int no_appendfsync_on_rewrite
; 
 395     pid_t bgsavechildpid
; 
 396     pid_t bgrewritechildpid
; 
 397     sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */ 
 398     sds aofbuf
;       /* AOF buffer, written before entering the event loop */ 
 399     struct saveparam 
*saveparams
; 
 404     char *appendfilename
; 
 408     /* Replication related */ 
 413     redisClient 
*master
;    /* client that is master for this slave */ 
 415     unsigned int maxclients
; 
 416     unsigned long long maxmemory
; 
 417     unsigned int blpop_blocked_clients
; 
 418     unsigned int vm_blocked_clients
; 
 419     /* Sort parameters - qsort_r() is only available under BSD so we 
 420      * have to take this state global, in order to pass it to sortCompare() */ 
 424     /* Virtual memory configuration */ 
 429     unsigned long long vm_max_memory
; 
 430     /* Zip structure config */ 
 431     size_t hash_max_zipmap_entries
; 
 432     size_t hash_max_zipmap_value
; 
 433     size_t list_max_ziplist_entries
; 
 434     size_t list_max_ziplist_value
; 
 435     /* Virtual memory state */ 
 438     off_t vm_next_page
; /* Next probably empty page */ 
 439     off_t vm_near_pages
; /* Number of pages allocated sequentially */ 
 440     unsigned char *vm_bitmap
; /* Bitmap of free/used pages */ 
 441     time_t unixtime
;    /* Unix time sampled every second. */ 
 442     /* Virtual memory I/O threads stuff */ 
 443     /* An I/O thread process an element taken from the io_jobs queue and 
 444      * put the result of the operation in the io_done list. While the 
 445      * job is being processed, it's put on io_processing queue. */ 
 446     list 
*io_newjobs
; /* List of VM I/O jobs yet to be processed */ 
 447     list 
*io_processing
; /* List of VM I/O jobs being processed */ 
 448     list 
*io_processed
; /* List of VM I/O jobs already processed */ 
 449     list 
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */ 
 450     pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */ 
 451     pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */ 
 452     pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */ 
 453     pthread_attr_t io_threads_attr
; /* attributes for threads creation */ 
 454     int io_active_threads
; /* Number of running I/O threads */ 
 455     int vm_max_threads
; /* Max number of I/O threads running at the same time */ 
 456     /* Our main thread is blocked on the event loop, locking for sockets ready 
 457      * to be read or written, so when a threaded I/O operation is ready to be 
 458      * processed by the main thread, the I/O thread will use a unix pipe to 
 459      * awake the main thread. The followings are the two pipe FDs. */ 
 460     int io_ready_pipe_read
; 
 461     int io_ready_pipe_write
; 
 462     /* Virtual memory stats */ 
 463     unsigned long long vm_stats_used_pages
; 
 464     unsigned long long vm_stats_swapped_objects
; 
 465     unsigned long long vm_stats_swapouts
; 
 466     unsigned long long vm_stats_swapins
; 
 468     dict 
*pubsub_channels
; /* Map channels to list of subscribed clients */ 
 469     list 
*pubsub_patterns
; /* A list of pubsub_patterns */ 
 472     unsigned lruclock
:22;        /* clock incrementing every minute, for LRU */ 
 473     unsigned lruclock_padding
:10; 
 476 typedef struct pubsubPattern 
{ 
 481 typedef void redisCommandProc(redisClient 
*c
); 
 482 typedef void redisVmPreloadProc(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 483 struct redisCommand 
{ 
 485     redisCommandProc 
*proc
; 
 488     /* Use a function to determine which keys need to be loaded 
 489      * in the background prior to executing this command. Takes precedence 
 490      * over vm_firstkey and others, ignored when NULL */ 
 491     redisVmPreloadProc 
*vm_preload_proc
; 
 492     /* What keys should be loaded in background when calling this command? */ 
 493     int vm_firstkey
; /* The first argument that's a key (0 = no keys) */ 
 494     int vm_lastkey
;  /* THe last argument that's a key */ 
 495     int vm_keystep
;  /* The step between first and last key */ 
 498 struct redisFunctionSym 
{ 
 500     unsigned long pointer
; 
 503 typedef struct _redisSortObject 
{ 
 511 typedef struct _redisSortOperation 
{ 
 514 } redisSortOperation
; 
 516 /* ZSETs use a specialized version of Skiplists */ 
 518 typedef struct zskiplistNode 
{ 
 519     struct zskiplistNode 
**forward
; 
 520     struct zskiplistNode 
*backward
; 
 526 typedef struct zskiplist 
{ 
 527     struct zskiplistNode 
*header
, *tail
; 
 528     unsigned long length
; 
 532 typedef struct zset 
{ 
 537 /* Our shared "common" objects */ 
 539 #define REDIS_SHARED_INTEGERS 10000 
 540 struct sharedObjectsStruct 
{ 
 541     robj 
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
, 
 542     *colon
, *nullbulk
, *nullmultibulk
, *queued
, 
 543     *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
, 
 544     *outofrangeerr
, *plus
, 
 545     *select0
, *select1
, *select2
, *select3
, *select4
, 
 546     *select5
, *select6
, *select7
, *select8
, *select9
, 
 547     *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
, 
 548     *mbulk4
, *psubscribebulk
, *punsubscribebulk
, 
 549     *integers
[REDIS_SHARED_INTEGERS
]; 
 552 /* Global vars that are actally used as constants. The following double 
 553  * values are used for double on-disk serialization, and are initialized 
 554  * at runtime to avoid strange compiler optimizations. */ 
 556 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
; 
 558 /* VM threaded I/O request message */ 
 559 #define REDIS_IOJOB_LOAD 0          /* Load from disk to memory */ 
 560 #define REDIS_IOJOB_PREPARE_SWAP 1  /* Compute needed pages */ 
 561 #define REDIS_IOJOB_DO_SWAP 2       /* Swap from memory to disk */ 
 562 typedef struct iojob 
{ 
 563     int type
;   /* Request type, REDIS_IOJOB_* */ 
 564     redisDb 
*db
;/* Redis database */ 
 565     robj 
*key
;  /* This I/O request is about swapping this key */ 
 566     robj 
*id
;   /* Unique identifier of this job: 
 567                    this is the object to swap for REDIS_IOREQ_*_SWAP, or the 
 568                    vmpointer objct for REDIS_IOREQ_LOAD. */ 
 569     robj 
*val
;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this 
 570                  * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ 
 571     off_t page
; /* Swap page where to read/write the object */ 
 572     off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */ 
 573     int canceled
; /* True if this command was canceled by blocking side of VM */ 
 574     pthread_t thread
; /* ID of the thread processing this entry */ 
 577 /*================================ Prototypes =============================== */ 
 579 static void freeStringObject(robj 
*o
); 
 580 static void freeListObject(robj 
*o
); 
 581 static void freeSetObject(robj 
*o
); 
 582 static void decrRefCount(void *o
); 
 583 static robj 
*createObject(int type
, void *ptr
); 
 584 static void freeClient(redisClient 
*c
); 
 585 static int rdbLoad(char *filename
); 
 586 static void addReply(redisClient 
*c
, robj 
*obj
); 
 587 static void addReplySds(redisClient 
*c
, sds s
); 
 588 static void incrRefCount(robj 
*o
); 
 589 static int rdbSaveBackground(char *filename
); 
 590 static robj 
*createStringObject(char *ptr
, size_t len
); 
 591 static robj 
*dupStringObject(robj 
*o
); 
 592 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
); 
 593 static void replicationFeedMonitors(list 
*monitors
, int dictid
, robj 
**argv
, int argc
); 
 594 static void flushAppendOnlyFile(void); 
 595 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 596 static int syncWithMaster(void); 
 597 static robj 
*tryObjectEncoding(robj 
*o
); 
 598 static robj 
*getDecodedObject(robj 
*o
); 
 599 static int removeExpire(redisDb 
*db
, robj 
*key
); 
 600 static int expireIfNeeded(redisDb 
*db
, robj 
*key
); 
 601 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
); 
 602 static int dbDelete(redisDb 
*db
, robj 
*key
); 
 603 static time_t getExpire(redisDb 
*db
, robj 
*key
); 
 604 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
); 
 605 static void updateSlavesWaitingBgsave(int bgsaveerr
); 
 606 static void freeMemoryIfNeeded(void); 
 607 static int processCommand(redisClient 
*c
); 
 608 static void setupSigSegvAction(void); 
 609 static void rdbRemoveTempFile(pid_t childpid
); 
 610 static void aofRemoveTempFile(pid_t childpid
); 
 611 static size_t stringObjectLen(robj 
*o
); 
 612 static void processInputBuffer(redisClient 
*c
); 
 613 static zskiplist 
*zslCreate(void); 
 614 static void zslFree(zskiplist 
*zsl
); 
 615 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
); 
 616 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 617 static void initClientMultiState(redisClient 
*c
); 
 618 static void freeClientMultiState(redisClient 
*c
); 
 619 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
); 
 620 static void unblockClientWaitingData(redisClient 
*c
); 
 621 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
); 
 622 static void vmInit(void); 
 623 static void vmMarkPagesFree(off_t page
, off_t count
); 
 624 static robj 
*vmLoadObject(robj 
*o
); 
 625 static robj 
*vmPreviewObject(robj 
*o
); 
 626 static int vmSwapOneObjectBlocking(void); 
 627 static int vmSwapOneObjectThreaded(void); 
 628 static int vmCanSwapOut(void); 
 629 static int tryFreeOneObjectFromFreelist(void); 
 630 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 631 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 632 static void vmCancelThreadedIOJob(robj 
*o
); 
 633 static void lockThreadedIO(void); 
 634 static void unlockThreadedIO(void); 
 635 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
); 
 636 static void freeIOJob(iojob 
*j
); 
 637 static void queueIOJob(iojob 
*j
); 
 638 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
); 
 639 static robj 
*vmReadObjectFromSwap(off_t page
, int type
); 
 640 static void waitEmptyIOJobsQueue(void); 
 641 static void vmReopenSwapFile(void); 
 642 static int vmFreePage(off_t page
); 
 643 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 644 static void execBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 645 static int blockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
); 
 646 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
); 
 647 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
); 
 648 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 649 static struct redisCommand 
*lookupCommand(char *name
); 
 650 static void call(redisClient 
*c
, struct redisCommand 
*cmd
); 
 651 static void resetClient(redisClient 
*c
); 
 652 static void convertToRealHash(robj 
*o
); 
 653 static void listTypeConvert(robj 
*o
, int enc
); 
 654 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
); 
 655 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
); 
 656 static void freePubsubPattern(void *p
); 
 657 static int listMatchPubsubPattern(void *a
, void *b
); 
 658 static int compareStringObjects(robj 
*a
, robj 
*b
); 
 659 static int equalStringObjects(robj 
*a
, robj 
*b
); 
 661 static int rewriteAppendOnlyFileBackground(void); 
 662 static vmpointer 
*vmSwapObjectBlocking(robj 
*val
); 
 663 static int prepareForShutdown(); 
 664 static void touchWatchedKey(redisDb 
*db
, robj 
*key
); 
 665 static void touchWatchedKeysOnFlush(int dbid
); 
 666 static void unwatchAllKeys(redisClient 
*c
); 
 668 static void authCommand(redisClient 
*c
); 
 669 static void pingCommand(redisClient 
*c
); 
 670 static void echoCommand(redisClient 
*c
); 
 671 static void setCommand(redisClient 
*c
); 
 672 static void setnxCommand(redisClient 
*c
); 
 673 static void setexCommand(redisClient 
*c
); 
 674 static void getCommand(redisClient 
*c
); 
 675 static void delCommand(redisClient 
*c
); 
 676 static void existsCommand(redisClient 
*c
); 
 677 static void incrCommand(redisClient 
*c
); 
 678 static void decrCommand(redisClient 
*c
); 
 679 static void incrbyCommand(redisClient 
*c
); 
 680 static void decrbyCommand(redisClient 
*c
); 
 681 static void selectCommand(redisClient 
*c
); 
 682 static void randomkeyCommand(redisClient 
*c
); 
 683 static void keysCommand(redisClient 
*c
); 
 684 static void dbsizeCommand(redisClient 
*c
); 
 685 static void lastsaveCommand(redisClient 
*c
); 
 686 static void saveCommand(redisClient 
*c
); 
 687 static void bgsaveCommand(redisClient 
*c
); 
 688 static void bgrewriteaofCommand(redisClient 
*c
); 
 689 static void shutdownCommand(redisClient 
*c
); 
 690 static void moveCommand(redisClient 
*c
); 
 691 static void renameCommand(redisClient 
*c
); 
 692 static void renamenxCommand(redisClient 
*c
); 
 693 static void lpushCommand(redisClient 
*c
); 
 694 static void rpushCommand(redisClient 
*c
); 
 695 static void lpopCommand(redisClient 
*c
); 
 696 static void rpopCommand(redisClient 
*c
); 
 697 static void llenCommand(redisClient 
*c
); 
 698 static void lindexCommand(redisClient 
*c
); 
 699 static void lrangeCommand(redisClient 
*c
); 
 700 static void ltrimCommand(redisClient 
*c
); 
 701 static void typeCommand(redisClient 
*c
); 
 702 static void lsetCommand(redisClient 
*c
); 
 703 static void saddCommand(redisClient 
*c
); 
 704 static void sremCommand(redisClient 
*c
); 
 705 static void smoveCommand(redisClient 
*c
); 
 706 static void sismemberCommand(redisClient 
*c
); 
 707 static void scardCommand(redisClient 
*c
); 
 708 static void spopCommand(redisClient 
*c
); 
 709 static void srandmemberCommand(redisClient 
*c
); 
 710 static void sinterCommand(redisClient 
*c
); 
 711 static void sinterstoreCommand(redisClient 
*c
); 
 712 static void sunionCommand(redisClient 
*c
); 
 713 static void sunionstoreCommand(redisClient 
*c
); 
 714 static void sdiffCommand(redisClient 
*c
); 
 715 static void sdiffstoreCommand(redisClient 
*c
); 
 716 static void syncCommand(redisClient 
*c
); 
 717 static void flushdbCommand(redisClient 
*c
); 
 718 static void flushallCommand(redisClient 
*c
); 
 719 static void sortCommand(redisClient 
*c
); 
 720 static void lremCommand(redisClient 
*c
); 
 721 static void rpoplpushcommand(redisClient 
*c
); 
 722 static void infoCommand(redisClient 
*c
); 
 723 static void mgetCommand(redisClient 
*c
); 
 724 static void monitorCommand(redisClient 
*c
); 
 725 static void expireCommand(redisClient 
*c
); 
 726 static void expireatCommand(redisClient 
*c
); 
 727 static void getsetCommand(redisClient 
*c
); 
 728 static void ttlCommand(redisClient 
*c
); 
 729 static void slaveofCommand(redisClient 
*c
); 
 730 static void debugCommand(redisClient 
*c
); 
 731 static void msetCommand(redisClient 
*c
); 
 732 static void msetnxCommand(redisClient 
*c
); 
 733 static void zaddCommand(redisClient 
*c
); 
 734 static void zincrbyCommand(redisClient 
*c
); 
 735 static void zrangeCommand(redisClient 
*c
); 
 736 static void zrangebyscoreCommand(redisClient 
*c
); 
 737 static void zcountCommand(redisClient 
*c
); 
 738 static void zrevrangeCommand(redisClient 
*c
); 
 739 static void zcardCommand(redisClient 
*c
); 
 740 static void zremCommand(redisClient 
*c
); 
 741 static void zscoreCommand(redisClient 
*c
); 
 742 static void zremrangebyscoreCommand(redisClient 
*c
); 
 743 static void multiCommand(redisClient 
*c
); 
 744 static void execCommand(redisClient 
*c
); 
 745 static void discardCommand(redisClient 
*c
); 
 746 static void blpopCommand(redisClient 
*c
); 
 747 static void brpopCommand(redisClient 
*c
); 
 748 static void appendCommand(redisClient 
*c
); 
 749 static void substrCommand(redisClient 
*c
); 
 750 static void zrankCommand(redisClient 
*c
); 
 751 static void zrevrankCommand(redisClient 
*c
); 
 752 static void hsetCommand(redisClient 
*c
); 
 753 static void hsetnxCommand(redisClient 
*c
); 
 754 static void hgetCommand(redisClient 
*c
); 
 755 static void hmsetCommand(redisClient 
*c
); 
 756 static void hmgetCommand(redisClient 
*c
); 
 757 static void hdelCommand(redisClient 
*c
); 
 758 static void hlenCommand(redisClient 
*c
); 
 759 static void zremrangebyrankCommand(redisClient 
*c
); 
 760 static void zunionstoreCommand(redisClient 
*c
); 
 761 static void zinterstoreCommand(redisClient 
*c
); 
 762 static void hkeysCommand(redisClient 
*c
); 
 763 static void hvalsCommand(redisClient 
*c
); 
 764 static void hgetallCommand(redisClient 
*c
); 
 765 static void hexistsCommand(redisClient 
*c
); 
 766 static void configCommand(redisClient 
*c
); 
 767 static void hincrbyCommand(redisClient 
*c
); 
 768 static void subscribeCommand(redisClient 
*c
); 
 769 static void unsubscribeCommand(redisClient 
*c
); 
 770 static void psubscribeCommand(redisClient 
*c
); 
 771 static void punsubscribeCommand(redisClient 
*c
); 
 772 static void publishCommand(redisClient 
*c
); 
 773 static void watchCommand(redisClient 
*c
); 
 774 static void unwatchCommand(redisClient 
*c
); 
 776 /*================================= Globals ================================= */ 
 779 static struct redisServer server
; /* server global state */ 
 780 static struct redisCommand 
*commandTable
; 
 781 static struct redisCommand readonlyCommandTable
[] = { 
 782     {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 783     {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 784     {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 785     {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 786     {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 787     {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 788     {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 789     {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 790     {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 791     {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 792     {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1}, 
 793     {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 794     {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 795     {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 796     {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 797     {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 798     {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 799     {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 800     {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 801     {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 802     {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 803     {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 804     {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 805     {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1}, 
 806     {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 807     {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 808     {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1}, 
 809     {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 810     {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 811     {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 812     {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 813     {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 814     {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 815     {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 816     {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 817     {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 818     {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 819     {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 820     {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 821     {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 822     {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 823     {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 824     {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 825     {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 826     {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 827     {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 828     {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 829     {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 830     {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 831     {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 832     {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 833     {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 834     {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 835     {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 836     {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 837     {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 838     {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 839     {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 840     {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 841     {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 842     {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 843     {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 844     {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 845     {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 846     {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 847     {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 848     {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 849     {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 850     {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 851     {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 852     {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 853     {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 854     {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 855     {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 856     {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 857     {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 858     {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 859     {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 860     {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 861     {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 862     {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 863     {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 864     {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 865     {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 866     {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 867     {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 868     {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 869     {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 870     {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 871     {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0}, 
 872     {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 873     {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 874     {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 875     {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 876     {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 877     {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 878     {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 879     {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 880     {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 881     {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 882     {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 883     {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 884     {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 885     {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 886     {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 887     {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0}, 
 888     {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 889     {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0} 
 892 /*============================ Utility functions ============================ */ 
 894 /* Glob-style pattern matching. */ 
 895 static int stringmatchlen(const char *pattern
, int patternLen
, 
 896         const char *string
, int stringLen
, int nocase
) 
 901             while (pattern
[1] == '*') { 
 906                 return 1; /* match */ 
 908                 if (stringmatchlen(pattern
+1, patternLen
-1, 
 909                             string
, stringLen
, nocase
)) 
 910                     return 1; /* match */ 
 914             return 0; /* no match */ 
 918                 return 0; /* no match */ 
 928             not = pattern
[0] == '^'; 
 935                 if (pattern
[0] == '\\') { 
 938                     if (pattern
[0] == string
[0]) 
 940                 } else if (pattern
[0] == ']') { 
 942                 } else if (patternLen 
== 0) { 
 946                 } else if (pattern
[1] == '-' && patternLen 
>= 3) { 
 947                     int start 
= pattern
[0]; 
 948                     int end 
= pattern
[2]; 
 956                         start 
= tolower(start
); 
 962                     if (c 
>= start 
&& c 
<= end
) 
 966                         if (pattern
[0] == string
[0]) 
 969                         if (tolower((int)pattern
[0]) == tolower((int)string
[0])) 
 979                 return 0; /* no match */ 
 985             if (patternLen 
>= 2) { 
 992                 if (pattern
[0] != string
[0]) 
 993                     return 0; /* no match */ 
 995                 if (tolower((int)pattern
[0]) != tolower((int)string
[0])) 
 996                     return 0; /* no match */ 
1004         if (stringLen 
== 0) { 
1005             while(*pattern 
== '*') { 
1012     if (patternLen 
== 0 && stringLen 
== 0) 
1017 static int stringmatch(const char *pattern
, const char *string
, int nocase
) { 
1018     return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
); 
1021 /* Convert a string representing an amount of memory into the number of 
1022  * bytes, so for instance memtoll("1Gi") will return 1073741824 that is 
1025  * On parsing error, if *err is not NULL, it's set to 1, otherwise it's 
1027 static long long memtoll(const char *p
, int *err
) { 
1030     long mul
; /* unit multiplier */ 
1032     unsigned int digits
; 
1035     /* Search the first non digit character. */ 
1038     while(*u 
&& isdigit(*u
)) u
++; 
1039     if (*u 
== '\0' || !strcasecmp(u
,"b")) { 
1041     } else if (!strcasecmp(u
,"k")) { 
1043     } else if (!strcasecmp(u
,"kb")) { 
1045     } else if (!strcasecmp(u
,"m")) { 
1047     } else if (!strcasecmp(u
,"mb")) { 
1049     } else if (!strcasecmp(u
,"g")) { 
1050         mul 
= 1000L*1000*1000; 
1051     } else if (!strcasecmp(u
,"gb")) { 
1052         mul 
= 1024L*1024*1024; 
1058     if (digits 
>= sizeof(buf
)) { 
1062     memcpy(buf
,p
,digits
); 
1064     val 
= strtoll(buf
,NULL
,10); 
1068 /* Convert a long long into a string. Returns the number of 
1069  * characters needed to represent the number, that can be shorter if passed 
1070  * buffer length is not enough to store the whole number. */ 
1071 static int ll2string(char *s
, size_t len
, long long value
) { 
1073     unsigned long long v
; 
1076     if (len 
== 0) return 0; 
1077     v 
= (value 
< 0) ? -value 
: value
; 
1078     p 
= buf
+31; /* point to the last character */ 
1083     if (value 
< 0) *p
-- = '-'; 
1086     if (l
+1 > len
) l 
= len
-1; /* Make sure it fits, including the nul term */ 
1092 static void redisLog(int level
, const char *fmt
, ...) { 
1096     fp 
= (server
.logfile 
== NULL
) ? stdout 
: fopen(server
.logfile
,"a"); 
1100     if (level 
>= server
.verbosity
) { 
1106         strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
)); 
1107         fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]); 
1108         vfprintf(fp
, fmt
, ap
); 
1114     if (server
.logfile
) fclose(fp
); 
1117 /*====================== Hash table type implementation  ==================== */ 
1119 /* This is an hash table type that uses the SDS dynamic strings libary as 
1120  * keys and radis objects as values (objects can hold SDS strings, 
1123 static void dictVanillaFree(void *privdata
, void *val
) 
1125     DICT_NOTUSED(privdata
); 
1129 static void dictListDestructor(void *privdata
, void *val
) 
1131     DICT_NOTUSED(privdata
); 
1132     listRelease((list
*)val
); 
1135 static int dictSdsKeyCompare(void *privdata
, const void *key1
, 
1139     DICT_NOTUSED(privdata
); 
1141     l1 
= sdslen((sds
)key1
); 
1142     l2 
= sdslen((sds
)key2
); 
1143     if (l1 
!= l2
) return 0; 
1144     return memcmp(key1
, key2
, l1
) == 0; 
1147 static void dictRedisObjectDestructor(void *privdata
, void *val
) 
1149     DICT_NOTUSED(privdata
); 
1151     if (val 
== NULL
) return; /* Values of swapped out keys as set to NULL */ 
1155 static void dictSdsDestructor(void *privdata
, void *val
) 
1157     DICT_NOTUSED(privdata
); 
1162 static int dictObjKeyCompare(void *privdata
, const void *key1
, 
1165     const robj 
*o1 
= key1
, *o2 
= key2
; 
1166     return dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1169 static unsigned int dictObjHash(const void *key
) { 
1170     const robj 
*o 
= key
; 
1171     return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1174 static unsigned int dictSdsHash(const void *key
) { 
1175     return dictGenHashFunction((unsigned char*)key
, sdslen((char*)key
)); 
1178 static int dictEncObjKeyCompare(void *privdata
, const void *key1
, 
1181     robj 
*o1 
= (robj
*) key1
, *o2 
= (robj
*) key2
; 
1184     if (o1
->encoding 
== REDIS_ENCODING_INT 
&& 
1185         o2
->encoding 
== REDIS_ENCODING_INT
) 
1186             return o1
->ptr 
== o2
->ptr
; 
1188     o1 
= getDecodedObject(o1
); 
1189     o2 
= getDecodedObject(o2
); 
1190     cmp 
= dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1196 static unsigned int dictEncObjHash(const void *key
) { 
1197     robj 
*o 
= (robj
*) key
; 
1199     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
1200         return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1202         if (o
->encoding 
== REDIS_ENCODING_INT
) { 
1206             len 
= ll2string(buf
,32,(long)o
->ptr
); 
1207             return dictGenHashFunction((unsigned char*)buf
, len
); 
1211             o 
= getDecodedObject(o
); 
1212             hash 
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1220 static dictType setDictType 
= { 
1221     dictEncObjHash
,            /* hash function */ 
1224     dictEncObjKeyCompare
,      /* key compare */ 
1225     dictRedisObjectDestructor
, /* key destructor */ 
1226     NULL                       
/* val destructor */ 
1229 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ 
1230 static dictType zsetDictType 
= { 
1231     dictEncObjHash
,            /* hash function */ 
1234     dictEncObjKeyCompare
,      /* key compare */ 
1235     dictRedisObjectDestructor
, /* key destructor */ 
1236     dictVanillaFree            
/* val destructor of malloc(sizeof(double)) */ 
1239 /* Db->dict, keys are sds strings, vals are Redis objects. */ 
1240 static dictType dbDictType 
= { 
1241     dictSdsHash
,                /* hash function */ 
1244     dictSdsKeyCompare
,          /* key compare */ 
1245     dictSdsDestructor
,          /* key destructor */ 
1246     dictRedisObjectDestructor   
/* val destructor */ 
1250 static dictType keyptrDictType 
= { 
1251     dictSdsHash
,               /* hash function */ 
1254     dictSdsKeyCompare
,         /* key compare */ 
1255     dictSdsDestructor
,         /* key destructor */ 
1256     NULL                       
/* val destructor */ 
1259 /* Hash type hash table (note that small hashes are represented with zimpaps) */ 
1260 static dictType hashDictType 
= { 
1261     dictEncObjHash
,             /* hash function */ 
1264     dictEncObjKeyCompare
,       /* key compare */ 
1265     dictRedisObjectDestructor
,  /* key destructor */ 
1266     dictRedisObjectDestructor   
/* val destructor */ 
1269 /* Keylist hash table type has unencoded redis objects as keys and 
1270  * lists as values. It's used for blocking operations (BLPOP) and to 
1271  * map swapped keys to a list of clients waiting for this keys to be loaded. */ 
1272 static dictType keylistDictType 
= { 
1273     dictObjHash
,                /* hash function */ 
1276     dictObjKeyCompare
,          /* key compare */ 
1277     dictRedisObjectDestructor
,  /* key destructor */ 
1278     dictListDestructor          
/* val destructor */ 
1281 static void version(); 
1283 /* ========================= Random utility functions ======================= */ 
1285 /* Redis generally does not try to recover from out of memory conditions 
1286  * when allocating objects or strings, it is not clear if it will be possible 
1287  * to report this condition to the client since the networking layer itself 
1288  * is based on heap allocation for send buffers, so we simply abort. 
1289  * At least the code will be simpler to read... */ 
1290 static void oom(const char *msg
) { 
1291     redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
); 
1296 /* ====================== Redis server networking stuff ===================== */ 
1297 static void closeTimedoutClients(void) { 
1300     time_t now 
= time(NULL
); 
1303     listRewind(server
.clients
,&li
); 
1304     while ((ln 
= listNext(&li
)) != NULL
) { 
1305         c 
= listNodeValue(ln
); 
1306         if (server
.maxidletime 
&& 
1307             !(c
->flags 
& REDIS_SLAVE
) &&    /* no timeout for slaves */ 
1308             !(c
->flags 
& REDIS_MASTER
) &&   /* no timeout for masters */ 
1309             dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */ 
1310             listLength(c
->pubsub_patterns
) == 0 && 
1311             (now 
- c
->lastinteraction 
> server
.maxidletime
)) 
1313             redisLog(REDIS_VERBOSE
,"Closing idle client"); 
1315         } else if (c
->flags 
& REDIS_BLOCKED
) { 
1316             if (c
->blockingto 
!= 0 && c
->blockingto 
< now
) { 
1317                 addReply(c
,shared
.nullmultibulk
); 
1318                 unblockClientWaitingData(c
); 
1324 static int htNeedsResize(dict 
*dict
) { 
1325     long long size
, used
; 
1327     size 
= dictSlots(dict
); 
1328     used 
= dictSize(dict
); 
1329     return (size 
&& used 
&& size 
> DICT_HT_INITIAL_SIZE 
&& 
1330             (used
*100/size 
< REDIS_HT_MINFILL
)); 
1333 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL 
1334  * we resize the hash table to save memory */ 
1335 static void tryResizeHashTables(void) { 
1338     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1339         if (htNeedsResize(server
.db
[j
].dict
)) 
1340             dictResize(server
.db
[j
].dict
); 
1341         if (htNeedsResize(server
.db
[j
].expires
)) 
1342             dictResize(server
.db
[j
].expires
); 
1346 /* Our hash table implementation performs rehashing incrementally while 
1347  * we write/read from the hash table. Still if the server is idle, the hash 
1348  * table will use two tables for a long time. So we try to use 1 millisecond 
1349  * of CPU time at every serverCron() loop in order to rehash some key. */ 
1350 static void incrementallyRehash(void) { 
1353     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1354         if (dictIsRehashing(server
.db
[j
].dict
)) { 
1355             dictRehashMilliseconds(server
.db
[j
].dict
,1); 
1356             break; /* already used our millisecond for this loop... */ 
1361 /* A background saving child (BGSAVE) terminated its work. Handle this. */ 
1362 void backgroundSaveDoneHandler(int statloc
) { 
1363     int exitcode 
= WEXITSTATUS(statloc
); 
1364     int bysignal 
= WIFSIGNALED(statloc
); 
1366     if (!bysignal 
&& exitcode 
== 0) { 
1367         redisLog(REDIS_NOTICE
, 
1368             "Background saving terminated with success"); 
1370         server
.lastsave 
= time(NULL
); 
1371     } else if (!bysignal 
&& exitcode 
!= 0) { 
1372         redisLog(REDIS_WARNING
, "Background saving error"); 
1374         redisLog(REDIS_WARNING
, 
1375             "Background saving terminated by signal %d", WTERMSIG(statloc
)); 
1376         rdbRemoveTempFile(server
.bgsavechildpid
); 
1378     server
.bgsavechildpid 
= -1; 
1379     /* Possibly there are slaves waiting for a BGSAVE in order to be served 
1380      * (the first stage of SYNC is a bulk transfer of dump.rdb) */ 
1381     updateSlavesWaitingBgsave(exitcode 
== 0 ? REDIS_OK 
: REDIS_ERR
); 
1384 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. 
1386 void backgroundRewriteDoneHandler(int statloc
) { 
1387     int exitcode 
= WEXITSTATUS(statloc
); 
1388     int bysignal 
= WIFSIGNALED(statloc
); 
1390     if (!bysignal 
&& exitcode 
== 0) { 
1394         redisLog(REDIS_NOTICE
, 
1395             "Background append only file rewriting terminated with success"); 
1396         /* Now it's time to flush the differences accumulated by the parent */ 
1397         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
); 
1398         fd 
= open(tmpfile
,O_WRONLY
|O_APPEND
); 
1400             redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
)); 
1403         /* Flush our data... */ 
1404         if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) != 
1405                 (signed) sdslen(server
.bgrewritebuf
)) { 
1406             redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
)); 
1410         redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
)); 
1411         /* Now our work is to rename the temp file into the stable file. And 
1412          * switch the file descriptor used by the server for append only. */ 
1413         if (rename(tmpfile
,server
.appendfilename
) == -1) { 
1414             redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
)); 
1418         /* Mission completed... almost */ 
1419         redisLog(REDIS_NOTICE
,"Append only file successfully rewritten."); 
1420         if (server
.appendfd 
!= -1) { 
1421             /* If append only is actually enabled... */ 
1422             close(server
.appendfd
); 
1423             server
.appendfd 
= fd
; 
1424             if (server
.appendfsync 
!= APPENDFSYNC_NO
) aof_fsync(fd
); 
1425             server
.appendseldb 
= -1; /* Make sure it will issue SELECT */ 
1426             redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends."); 
1428             /* If append only is disabled we just generate a dump in this 
1429              * format. Why not? */ 
1432     } else if (!bysignal 
&& exitcode 
!= 0) { 
1433         redisLog(REDIS_WARNING
, "Background append only file rewriting error"); 
1435         redisLog(REDIS_WARNING
, 
1436             "Background append only file rewriting terminated by signal %d", 
1440     sdsfree(server
.bgrewritebuf
); 
1441     server
.bgrewritebuf 
= sdsempty(); 
1442     aofRemoveTempFile(server
.bgrewritechildpid
); 
1443     server
.bgrewritechildpid 
= -1; 
1446 /* This function is called once a background process of some kind terminates, 
1447  * as we want to avoid resizing the hash tables when there is a child in order 
1448  * to play well with copy-on-write (otherwise when a resize happens lots of 
1449  * memory pages are copied). The goal of this function is to update the ability 
1450  * for dict.c to resize the hash tables accordingly to the fact we have o not 
1451  * running childs. */ 
1452 static void updateDictResizePolicy(void) { 
1453     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) 
1456         dictDisableResize(); 
1459 static int serverCron(struct aeEventLoop 
*eventLoop
, long long id
, void *clientData
) { 
1460     int j
, loops 
= server
.cronloops
++; 
1461     REDIS_NOTUSED(eventLoop
); 
1463     REDIS_NOTUSED(clientData
); 
1465     /* We take a cached value of the unix time in the global state because 
1466      * with virtual memory and aging there is to store the current time 
1467      * in objects at every object access, and accuracy is not needed. 
1468      * To access a global var is faster than calling time(NULL) */ 
1469     server
.unixtime 
= time(NULL
); 
1470     /* We have just 21 bits per object for LRU information. 
1471      * So we use an (eventually wrapping) LRU clock with minutes resolution. 
1473      * When we need to select what object to swap, we compute the minimum 
1474      * time distance between the current lruclock and the object last access 
1475      * lruclock info. Even if clocks will wrap on overflow, there is 
1476      * the interesting property that we are sure that at least 
1477      * ABS(A-B) minutes passed between current time and timestamp B. 
1479      * This is not precise but we don't need at all precision, but just 
1480      * something statistically reasonable. 
1482     server
.lruclock 
= (time(NULL
)/60)&((1<<21)-1); 
1484     /* We received a SIGTERM, shutting down here in a safe way, as it is 
1485      * not ok doing so inside the signal handler. */ 
1486     if (server
.shutdown_asap
) { 
1487         if (prepareForShutdown() == REDIS_OK
) exit(0); 
1488         redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information"); 
1491     /* Show some info about non-empty databases */ 
1492     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1493         long long size
, used
, vkeys
; 
1495         size 
= dictSlots(server
.db
[j
].dict
); 
1496         used 
= dictSize(server
.db
[j
].dict
); 
1497         vkeys 
= dictSize(server
.db
[j
].expires
); 
1498         if (!(loops 
% 50) && (used 
|| vkeys
)) { 
1499             redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
); 
1500             /* dictPrintStats(server.dict); */ 
1504     /* We don't want to resize the hash tables while a bacground saving 
1505      * is in progress: the saving child is created using fork() that is 
1506      * implemented with a copy-on-write semantic in most modern systems, so 
1507      * if we resize the HT while there is the saving child at work actually 
1508      * a lot of memory movements in the parent will cause a lot of pages 
1510     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) { 
1511         if (!(loops 
% 10)) tryResizeHashTables(); 
1512         if (server
.activerehashing
) incrementallyRehash(); 
1515     /* Show information about connected clients */ 
1516     if (!(loops 
% 50)) { 
1517         redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use", 
1518             listLength(server
.clients
)-listLength(server
.slaves
), 
1519             listLength(server
.slaves
), 
1520             zmalloc_used_memory()); 
1523     /* Close connections of timedout clients */ 
1524     if ((server
.maxidletime 
&& !(loops 
% 100)) || server
.blpop_blocked_clients
) 
1525         closeTimedoutClients(); 
1527     /* Check if a background saving or AOF rewrite in progress terminated */ 
1528     if (server
.bgsavechildpid 
!= -1 || server
.bgrewritechildpid 
!= -1) { 
1532         if ((pid 
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) { 
1533             if (pid 
== server
.bgsavechildpid
) { 
1534                 backgroundSaveDoneHandler(statloc
); 
1536                 backgroundRewriteDoneHandler(statloc
); 
1538             updateDictResizePolicy(); 
1541         /* If there is not a background saving in progress check if 
1542          * we have to save now */ 
1543          time_t now 
= time(NULL
); 
1544          for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
1545             struct saveparam 
*sp 
= server
.saveparams
+j
; 
1547             if (server
.dirty 
>= sp
->changes 
&& 
1548                 now
-server
.lastsave 
> sp
->seconds
) { 
1549                 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...", 
1550                     sp
->changes
, sp
->seconds
); 
1551                 rdbSaveBackground(server
.dbfilename
); 
1557     /* Try to expire a few timed out keys. The algorithm used is adaptive and 
1558      * will use few CPU cycles if there are few expiring keys, otherwise 
1559      * it will get more aggressive to avoid that too much memory is used by 
1560      * keys that can be removed from the keyspace. */ 
1561     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1563         redisDb 
*db 
= server
.db
+j
; 
1565         /* Continue to expire if at the end of the cycle more than 25% 
1566          * of the keys were expired. */ 
1568             long num 
= dictSize(db
->expires
); 
1569             time_t now 
= time(NULL
); 
1572             if (num 
> REDIS_EXPIRELOOKUPS_PER_CRON
) 
1573                 num 
= REDIS_EXPIRELOOKUPS_PER_CRON
; 
1578                 if ((de 
= dictGetRandomKey(db
->expires
)) == NULL
) break; 
1579                 t 
= (time_t) dictGetEntryVal(de
); 
1581                     sds key 
= dictGetEntryKey(de
); 
1582                     robj 
*keyobj 
= createStringObject(key
,sdslen(key
)); 
1584                     dbDelete(db
,keyobj
); 
1585                     decrRefCount(keyobj
); 
1587                     server
.stat_expiredkeys
++; 
1590         } while (expired 
> REDIS_EXPIRELOOKUPS_PER_CRON
/4); 
1593     /* Swap a few keys on disk if we are over the memory limit and VM 
1594      * is enbled. Try to free objects from the free list first. */ 
1595     if (vmCanSwapOut()) { 
1596         while (server
.vm_enabled 
&& zmalloc_used_memory() > 
1597                 server
.vm_max_memory
) 
1601             if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
1602             retval 
= (server
.vm_max_threads 
== 0) ? 
1603                         vmSwapOneObjectBlocking() : 
1604                         vmSwapOneObjectThreaded(); 
1605             if (retval 
== REDIS_ERR 
&& !(loops 
% 300) && 
1606                 zmalloc_used_memory() > 
1607                 (server
.vm_max_memory
+server
.vm_max_memory
/10)) 
1609                 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!"); 
1611             /* Note that when using threade I/O we free just one object, 
1612              * because anyway when the I/O thread in charge to swap this 
1613              * object out will finish, the handler of completed jobs 
1614              * will try to swap more objects if we are still out of memory. */ 
1615             if (retval 
== REDIS_ERR 
|| server
.vm_max_threads 
> 0) break; 
1619     /* Check if we should connect to a MASTER */ 
1620     if (server
.replstate 
== REDIS_REPL_CONNECT 
&& !(loops 
% 10)) { 
1621         redisLog(REDIS_NOTICE
,"Connecting to MASTER..."); 
1622         if (syncWithMaster() == REDIS_OK
) { 
1623             redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded"); 
1624             if (server
.appendonly
) rewriteAppendOnlyFileBackground(); 
1630 /* This function gets called every time Redis is entering the 
1631  * main loop of the event driven library, that is, before to sleep 
1632  * for ready file descriptors. */ 
1633 static void beforeSleep(struct aeEventLoop 
*eventLoop
) { 
1634     REDIS_NOTUSED(eventLoop
); 
1636     /* Awake clients that got all the swapped keys they requested */ 
1637     if (server
.vm_enabled 
&& listLength(server
.io_ready_clients
)) { 
1641         listRewind(server
.io_ready_clients
,&li
); 
1642         while((ln 
= listNext(&li
))) { 
1643             redisClient 
*c 
= ln
->value
; 
1644             struct redisCommand 
*cmd
; 
1646             /* Resume the client. */ 
1647             listDelNode(server
.io_ready_clients
,ln
); 
1648             c
->flags 
&= (~REDIS_IO_WAIT
); 
1649             server
.vm_blocked_clients
--; 
1650             aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
1651                 readQueryFromClient
, c
); 
1652             cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
1653             assert(cmd 
!= NULL
); 
1656             /* There may be more data to process in the input buffer. */ 
1657             if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) 
1658                 processInputBuffer(c
); 
1661     /* Write the AOF buffer on disk */ 
1662     flushAppendOnlyFile(); 
1665 static void createSharedObjects(void) { 
1668     shared
.crlf 
= createObject(REDIS_STRING
,sdsnew("\r\n")); 
1669     shared
.ok 
= createObject(REDIS_STRING
,sdsnew("+OK\r\n")); 
1670     shared
.err 
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n")); 
1671     shared
.emptybulk 
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n")); 
1672     shared
.czero 
= createObject(REDIS_STRING
,sdsnew(":0\r\n")); 
1673     shared
.cone 
= createObject(REDIS_STRING
,sdsnew(":1\r\n")); 
1674     shared
.nullbulk 
= createObject(REDIS_STRING
,sdsnew("$-1\r\n")); 
1675     shared
.nullmultibulk 
= createObject(REDIS_STRING
,sdsnew("*-1\r\n")); 
1676     shared
.emptymultibulk 
= createObject(REDIS_STRING
,sdsnew("*0\r\n")); 
1677     shared
.pong 
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n")); 
1678     shared
.queued 
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n")); 
1679     shared
.wrongtypeerr 
= createObject(REDIS_STRING
,sdsnew( 
1680         "-ERR Operation against a key holding the wrong kind of value\r\n")); 
1681     shared
.nokeyerr 
= createObject(REDIS_STRING
,sdsnew( 
1682         "-ERR no such key\r\n")); 
1683     shared
.syntaxerr 
= createObject(REDIS_STRING
,sdsnew( 
1684         "-ERR syntax error\r\n")); 
1685     shared
.sameobjecterr 
= createObject(REDIS_STRING
,sdsnew( 
1686         "-ERR source and destination objects are the same\r\n")); 
1687     shared
.outofrangeerr 
= createObject(REDIS_STRING
,sdsnew( 
1688         "-ERR index out of range\r\n")); 
1689     shared
.space 
= createObject(REDIS_STRING
,sdsnew(" ")); 
1690     shared
.colon 
= createObject(REDIS_STRING
,sdsnew(":")); 
1691     shared
.plus 
= createObject(REDIS_STRING
,sdsnew("+")); 
1692     shared
.select0 
= createStringObject("select 0\r\n",10); 
1693     shared
.select1 
= createStringObject("select 1\r\n",10); 
1694     shared
.select2 
= createStringObject("select 2\r\n",10); 
1695     shared
.select3 
= createStringObject("select 3\r\n",10); 
1696     shared
.select4 
= createStringObject("select 4\r\n",10); 
1697     shared
.select5 
= createStringObject("select 5\r\n",10); 
1698     shared
.select6 
= createStringObject("select 6\r\n",10); 
1699     shared
.select7 
= createStringObject("select 7\r\n",10); 
1700     shared
.select8 
= createStringObject("select 8\r\n",10); 
1701     shared
.select9 
= createStringObject("select 9\r\n",10); 
1702     shared
.messagebulk 
= createStringObject("$7\r\nmessage\r\n",13); 
1703     shared
.pmessagebulk 
= createStringObject("$8\r\npmessage\r\n",14); 
1704     shared
.subscribebulk 
= createStringObject("$9\r\nsubscribe\r\n",15); 
1705     shared
.unsubscribebulk 
= createStringObject("$11\r\nunsubscribe\r\n",18); 
1706     shared
.psubscribebulk 
= createStringObject("$10\r\npsubscribe\r\n",17); 
1707     shared
.punsubscribebulk 
= createStringObject("$12\r\npunsubscribe\r\n",19); 
1708     shared
.mbulk3 
= createStringObject("*3\r\n",4); 
1709     shared
.mbulk4 
= createStringObject("*4\r\n",4); 
1710     for (j 
= 0; j 
< REDIS_SHARED_INTEGERS
; j
++) { 
1711         shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
); 
1712         shared
.integers
[j
]->encoding 
= REDIS_ENCODING_INT
; 
1716 static void appendServerSaveParams(time_t seconds
, int changes
) { 
1717     server
.saveparams 
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1)); 
1718     server
.saveparams
[server
.saveparamslen
].seconds 
= seconds
; 
1719     server
.saveparams
[server
.saveparamslen
].changes 
= changes
; 
1720     server
.saveparamslen
++; 
1723 static void resetServerSaveParams() { 
1724     zfree(server
.saveparams
); 
1725     server
.saveparams 
= NULL
; 
1726     server
.saveparamslen 
= 0; 
1729 static void initServerConfig() { 
1730     server
.dbnum 
= REDIS_DEFAULT_DBNUM
; 
1731     server
.port 
= REDIS_SERVERPORT
; 
1732     server
.verbosity 
= REDIS_VERBOSE
; 
1733     server
.maxidletime 
= REDIS_MAXIDLETIME
; 
1734     server
.saveparams 
= NULL
; 
1735     server
.logfile 
= NULL
; /* NULL = log on standard output */ 
1736     server
.bindaddr 
= NULL
; 
1737     server
.glueoutputbuf 
= 1; 
1738     server
.daemonize 
= 0; 
1739     server
.appendonly 
= 0; 
1740     server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1741     server
.no_appendfsync_on_rewrite 
= 0; 
1742     server
.lastfsync 
= time(NULL
); 
1743     server
.appendfd 
= -1; 
1744     server
.appendseldb 
= -1; /* Make sure the first time will not match */ 
1745     server
.pidfile 
= zstrdup("/var/run/redis.pid"); 
1746     server
.dbfilename 
= zstrdup("dump.rdb"); 
1747     server
.appendfilename 
= zstrdup("appendonly.aof"); 
1748     server
.requirepass 
= NULL
; 
1749     server
.rdbcompression 
= 1; 
1750     server
.activerehashing 
= 1; 
1751     server
.maxclients 
= 0; 
1752     server
.blpop_blocked_clients 
= 0; 
1753     server
.maxmemory 
= 0; 
1754     server
.vm_enabled 
= 0; 
1755     server
.vm_swap_file 
= zstrdup("/tmp/redis-%p.vm"); 
1756     server
.vm_page_size 
= 256;          /* 256 bytes per page */ 
1757     server
.vm_pages 
= 1024*1024*100;    /* 104 millions of pages */ 
1758     server
.vm_max_memory 
= 1024LL*1024*1024*1; /* 1 GB of RAM */ 
1759     server
.vm_max_threads 
= 4; 
1760     server
.vm_blocked_clients 
= 0; 
1761     server
.hash_max_zipmap_entries 
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
; 
1762     server
.hash_max_zipmap_value 
= REDIS_HASH_MAX_ZIPMAP_VALUE
; 
1763     server
.list_max_ziplist_entries 
= REDIS_LIST_MAX_ZIPLIST_ENTRIES
; 
1764     server
.list_max_ziplist_value 
= REDIS_LIST_MAX_ZIPLIST_VALUE
; 
1765     server
.shutdown_asap 
= 0; 
1767     resetServerSaveParams(); 
1769     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */ 
1770     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */ 
1771     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ 
1772     /* Replication related */ 
1774     server
.masterauth 
= NULL
; 
1775     server
.masterhost 
= NULL
; 
1776     server
.masterport 
= 6379; 
1777     server
.master 
= NULL
; 
1778     server
.replstate 
= REDIS_REPL_NONE
; 
1780     /* Double constants initialization */ 
1782     R_PosInf 
= 1.0/R_Zero
; 
1783     R_NegInf 
= -1.0/R_Zero
; 
1784     R_Nan 
= R_Zero
/R_Zero
; 
1787 static void initServer() { 
1790     signal(SIGHUP
, SIG_IGN
); 
1791     signal(SIGPIPE
, SIG_IGN
); 
1792     setupSigSegvAction(); 
1794     server
.devnull 
= fopen("/dev/null","w"); 
1795     if (server
.devnull 
== NULL
) { 
1796         redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
); 
1799     server
.clients 
= listCreate(); 
1800     server
.slaves 
= listCreate(); 
1801     server
.monitors 
= listCreate(); 
1802     server
.objfreelist 
= listCreate(); 
1803     createSharedObjects(); 
1804     server
.el 
= aeCreateEventLoop(); 
1805     server
.db 
= zmalloc(sizeof(redisDb
)*server
.dbnum
); 
1806     server
.fd 
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
); 
1807     if (server
.fd 
== -1) { 
1808         redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
); 
1811     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1812         server
.db
[j
].dict 
= dictCreate(&dbDictType
,NULL
); 
1813         server
.db
[j
].expires 
= dictCreate(&keyptrDictType
,NULL
); 
1814         server
.db
[j
].blocking_keys 
= dictCreate(&keylistDictType
,NULL
); 
1815         server
.db
[j
].watched_keys 
= dictCreate(&keylistDictType
,NULL
); 
1816         if (server
.vm_enabled
) 
1817             server
.db
[j
].io_keys 
= dictCreate(&keylistDictType
,NULL
); 
1818         server
.db
[j
].id 
= j
; 
1820     server
.pubsub_channels 
= dictCreate(&keylistDictType
,NULL
); 
1821     server
.pubsub_patterns 
= listCreate(); 
1822     listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
); 
1823     listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
); 
1824     server
.cronloops 
= 0; 
1825     server
.bgsavechildpid 
= -1; 
1826     server
.bgrewritechildpid 
= -1; 
1827     server
.bgrewritebuf 
= sdsempty(); 
1828     server
.aofbuf 
= sdsempty(); 
1829     server
.lastsave 
= time(NULL
); 
1831     server
.stat_numcommands 
= 0; 
1832     server
.stat_numconnections 
= 0; 
1833     server
.stat_expiredkeys 
= 0; 
1834     server
.stat_starttime 
= time(NULL
); 
1835     server
.unixtime 
= time(NULL
); 
1836     aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
); 
1837     if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
, 
1838         acceptHandler
, NULL
) == AE_ERR
) oom("creating file event"); 
1840     if (server
.appendonly
) { 
1841         server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
1842         if (server
.appendfd 
== -1) { 
1843             redisLog(REDIS_WARNING
, "Can't open the append-only file: %s", 
1849     if (server
.vm_enabled
) vmInit(); 
1852 /* Empty the whole database */ 
1853 static long long emptyDb() { 
1855     long long removed 
= 0; 
1857     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1858         removed 
+= dictSize(server
.db
[j
].dict
); 
1859         dictEmpty(server
.db
[j
].dict
); 
1860         dictEmpty(server
.db
[j
].expires
); 
1865 static int yesnotoi(char *s
) { 
1866     if (!strcasecmp(s
,"yes")) return 1; 
1867     else if (!strcasecmp(s
,"no")) return 0; 
1871 /* I agree, this is a very rudimental way to load a configuration... 
1872    will improve later if the config gets more complex */ 
1873 static void loadServerConfig(char *filename
) { 
1875     char buf
[REDIS_CONFIGLINE_MAX
+1], *err 
= NULL
; 
1879     if (filename
[0] == '-' && filename
[1] == '\0') 
1882         if ((fp 
= fopen(filename
,"r")) == NULL
) { 
1883             redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
); 
1888     while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) { 
1894         line 
= sdstrim(line
," \t\r\n"); 
1896         /* Skip comments and blank lines*/ 
1897         if (line
[0] == '#' || line
[0] == '\0') { 
1902         /* Split into arguments */ 
1903         argv 
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
); 
1904         sdstolower(argv
[0]); 
1906         /* Execute config directives */ 
1907         if (!strcasecmp(argv
[0],"timeout") && argc 
== 2) { 
1908             server
.maxidletime 
= atoi(argv
[1]); 
1909             if (server
.maxidletime 
< 0) { 
1910                 err 
= "Invalid timeout value"; goto loaderr
; 
1912         } else if (!strcasecmp(argv
[0],"port") && argc 
== 2) { 
1913             server
.port 
= atoi(argv
[1]); 
1914             if (server
.port 
< 1 || server
.port 
> 65535) { 
1915                 err 
= "Invalid port"; goto loaderr
; 
1917         } else if (!strcasecmp(argv
[0],"bind") && argc 
== 2) { 
1918             server
.bindaddr 
= zstrdup(argv
[1]); 
1919         } else if (!strcasecmp(argv
[0],"save") && argc 
== 3) { 
1920             int seconds 
= atoi(argv
[1]); 
1921             int changes 
= atoi(argv
[2]); 
1922             if (seconds 
< 1 || changes 
< 0) { 
1923                 err 
= "Invalid save parameters"; goto loaderr
; 
1925             appendServerSaveParams(seconds
,changes
); 
1926         } else if (!strcasecmp(argv
[0],"dir") && argc 
== 2) { 
1927             if (chdir(argv
[1]) == -1) { 
1928                 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s", 
1929                     argv
[1], strerror(errno
)); 
1932         } else if (!strcasecmp(argv
[0],"loglevel") && argc 
== 2) { 
1933             if (!strcasecmp(argv
[1],"debug")) server
.verbosity 
= REDIS_DEBUG
; 
1934             else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity 
= REDIS_VERBOSE
; 
1935             else if (!strcasecmp(argv
[1],"notice")) server
.verbosity 
= REDIS_NOTICE
; 
1936             else if (!strcasecmp(argv
[1],"warning")) server
.verbosity 
= REDIS_WARNING
; 
1938                 err 
= "Invalid log level. Must be one of debug, notice, warning"; 
1941         } else if (!strcasecmp(argv
[0],"logfile") && argc 
== 2) { 
1944             server
.logfile 
= zstrdup(argv
[1]); 
1945             if (!strcasecmp(server
.logfile
,"stdout")) { 
1946                 zfree(server
.logfile
); 
1947                 server
.logfile 
= NULL
; 
1949             if (server
.logfile
) { 
1950                 /* Test if we are able to open the file. The server will not 
1951                  * be able to abort just for this problem later... */ 
1952                 logfp 
= fopen(server
.logfile
,"a"); 
1953                 if (logfp 
== NULL
) { 
1954                     err 
= sdscatprintf(sdsempty(), 
1955                         "Can't open the log file: %s", strerror(errno
)); 
1960         } else if (!strcasecmp(argv
[0],"databases") && argc 
== 2) { 
1961             server
.dbnum 
= atoi(argv
[1]); 
1962             if (server
.dbnum 
< 1) { 
1963                 err 
= "Invalid number of databases"; goto loaderr
; 
1965         } else if (!strcasecmp(argv
[0],"include") && argc 
== 2) { 
1966             loadServerConfig(argv
[1]); 
1967         } else if (!strcasecmp(argv
[0],"maxclients") && argc 
== 2) { 
1968             server
.maxclients 
= atoi(argv
[1]); 
1969         } else if (!strcasecmp(argv
[0],"maxmemory") && argc 
== 2) { 
1970             server
.maxmemory 
= memtoll(argv
[1],NULL
); 
1971         } else if (!strcasecmp(argv
[0],"slaveof") && argc 
== 3) { 
1972             server
.masterhost 
= sdsnew(argv
[1]); 
1973             server
.masterport 
= atoi(argv
[2]); 
1974             server
.replstate 
= REDIS_REPL_CONNECT
; 
1975         } else if (!strcasecmp(argv
[0],"masterauth") && argc 
== 2) { 
1976                 server
.masterauth 
= zstrdup(argv
[1]); 
1977         } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc 
== 2) { 
1978             if ((server
.glueoutputbuf 
= yesnotoi(argv
[1])) == -1) { 
1979                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1981         } else if (!strcasecmp(argv
[0],"rdbcompression") && argc 
== 2) { 
1982             if ((server
.rdbcompression 
= yesnotoi(argv
[1])) == -1) { 
1983                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1985         } else if (!strcasecmp(argv
[0],"activerehashing") && argc 
== 2) { 
1986             if ((server
.activerehashing 
= yesnotoi(argv
[1])) == -1) { 
1987                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1989         } else if (!strcasecmp(argv
[0],"daemonize") && argc 
== 2) { 
1990             if ((server
.daemonize 
= yesnotoi(argv
[1])) == -1) { 
1991                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1993         } else if (!strcasecmp(argv
[0],"appendonly") && argc 
== 2) { 
1994             if ((server
.appendonly 
= yesnotoi(argv
[1])) == -1) { 
1995                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1997         } else if (!strcasecmp(argv
[0],"appendfilename") && argc 
== 2) { 
1998             zfree(server
.appendfilename
); 
1999             server
.appendfilename 
= zstrdup(argv
[1]); 
2000         } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite") 
2002             if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) { 
2003                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
2005         } else if (!strcasecmp(argv
[0],"appendfsync") && argc 
== 2) { 
2006             if (!strcasecmp(argv
[1],"no")) { 
2007                 server
.appendfsync 
= APPENDFSYNC_NO
; 
2008             } else if (!strcasecmp(argv
[1],"always")) { 
2009                 server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
2010             } else if (!strcasecmp(argv
[1],"everysec")) { 
2011                 server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
2013                 err 
= "argument must be 'no', 'always' or 'everysec'"; 
2016         } else if (!strcasecmp(argv
[0],"requirepass") && argc 
== 2) { 
2017             server
.requirepass 
= zstrdup(argv
[1]); 
2018         } else if (!strcasecmp(argv
[0],"pidfile") && argc 
== 2) { 
2019             zfree(server
.pidfile
); 
2020             server
.pidfile 
= zstrdup(argv
[1]); 
2021         } else if (!strcasecmp(argv
[0],"dbfilename") && argc 
== 2) { 
2022             zfree(server
.dbfilename
); 
2023             server
.dbfilename 
= zstrdup(argv
[1]); 
2024         } else if (!strcasecmp(argv
[0],"vm-enabled") && argc 
== 2) { 
2025             if ((server
.vm_enabled 
= yesnotoi(argv
[1])) == -1) { 
2026                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
2028         } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc 
== 2) { 
2029             zfree(server
.vm_swap_file
); 
2030             server
.vm_swap_file 
= zstrdup(argv
[1]); 
2031         } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc 
== 2) { 
2032             server
.vm_max_memory 
= memtoll(argv
[1],NULL
); 
2033         } else if (!strcasecmp(argv
[0],"vm-page-size") && argc 
== 2) { 
2034             server
.vm_page_size 
= memtoll(argv
[1], NULL
); 
2035         } else if (!strcasecmp(argv
[0],"vm-pages") && argc 
== 2) { 
2036             server
.vm_pages 
= memtoll(argv
[1], NULL
); 
2037         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
2038             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
2039         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc 
== 2){ 
2040             server
.hash_max_zipmap_entries 
= memtoll(argv
[1], NULL
); 
2041         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc 
== 2){ 
2042             server
.hash_max_zipmap_value 
= memtoll(argv
[1], NULL
); 
2043         } else if (!strcasecmp(argv
[0],"list-max-ziplist-entries") && argc 
== 2){ 
2044             server
.list_max_ziplist_entries 
= memtoll(argv
[1], NULL
); 
2045         } else if (!strcasecmp(argv
[0],"list-max-ziplist-value") && argc 
== 2){ 
2046             server
.list_max_ziplist_value 
= memtoll(argv
[1], NULL
); 
2048             err 
= "Bad directive or wrong number of arguments"; goto loaderr
; 
2050         for (j 
= 0; j 
< argc
; j
++) 
2055     if (fp 
!= stdin
) fclose(fp
); 
2059     fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n"); 
2060     fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
); 
2061     fprintf(stderr
, ">>> '%s'\n", line
); 
2062     fprintf(stderr
, "%s\n", err
); 
2066 static void freeClientArgv(redisClient 
*c
) { 
2069     for (j 
= 0; j 
< c
->argc
; j
++) 
2070         decrRefCount(c
->argv
[j
]); 
2071     for (j 
= 0; j 
< c
->mbargc
; j
++) 
2072         decrRefCount(c
->mbargv
[j
]); 
2077 static void freeClient(redisClient 
*c
) { 
2080     /* Note that if the client we are freeing is blocked into a blocking 
2081      * call, we have to set querybuf to NULL *before* to call 
2082      * unblockClientWaitingData() to avoid processInputBuffer() will get 
2083      * called. Also it is important to remove the file events after 
2084      * this, because this call adds the READABLE event. */ 
2085     sdsfree(c
->querybuf
); 
2087     if (c
->flags 
& REDIS_BLOCKED
) 
2088         unblockClientWaitingData(c
); 
2090     /* UNWATCH all the keys */ 
2092     listRelease(c
->watched_keys
); 
2093     /* Unsubscribe from all the pubsub channels */ 
2094     pubsubUnsubscribeAllChannels(c
,0); 
2095     pubsubUnsubscribeAllPatterns(c
,0); 
2096     dictRelease(c
->pubsub_channels
); 
2097     listRelease(c
->pubsub_patterns
); 
2098     /* Obvious cleanup */ 
2099     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
2100     aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2101     listRelease(c
->reply
); 
2104     /* Remove from the list of clients */ 
2105     ln 
= listSearchKey(server
.clients
,c
); 
2106     redisAssert(ln 
!= NULL
); 
2107     listDelNode(server
.clients
,ln
); 
2108     /* Remove from the list of clients that are now ready to be restarted 
2109      * after waiting for swapped keys */ 
2110     if (c
->flags 
& REDIS_IO_WAIT 
&& listLength(c
->io_keys
) == 0) { 
2111         ln 
= listSearchKey(server
.io_ready_clients
,c
); 
2113             listDelNode(server
.io_ready_clients
,ln
); 
2114             server
.vm_blocked_clients
--; 
2117     /* Remove from the list of clients waiting for swapped keys */ 
2118     while (server
.vm_enabled 
&& listLength(c
->io_keys
)) { 
2119         ln 
= listFirst(c
->io_keys
); 
2120         dontWaitForSwappedKey(c
,ln
->value
); 
2122     listRelease(c
->io_keys
); 
2123     /* Master/slave cleanup */ 
2124     if (c
->flags 
& REDIS_SLAVE
) { 
2125         if (c
->replstate 
== REDIS_REPL_SEND_BULK 
&& c
->repldbfd 
!= -1) 
2127         list 
*l 
= (c
->flags 
& REDIS_MONITOR
) ? server
.monitors 
: server
.slaves
; 
2128         ln 
= listSearchKey(l
,c
); 
2129         redisAssert(ln 
!= NULL
); 
2132     if (c
->flags 
& REDIS_MASTER
) { 
2133         server
.master 
= NULL
; 
2134         server
.replstate 
= REDIS_REPL_CONNECT
; 
2136     /* Release memory */ 
2139     freeClientMultiState(c
); 
2143 #define GLUEREPLY_UP_TO (1024) 
2144 static void glueReplyBuffersIfNeeded(redisClient 
*c
) { 
2146     char buf
[GLUEREPLY_UP_TO
]; 
2151     listRewind(c
->reply
,&li
); 
2152     while((ln 
= listNext(&li
))) { 
2156         objlen 
= sdslen(o
->ptr
); 
2157         if (copylen 
+ objlen 
<= GLUEREPLY_UP_TO
) { 
2158             memcpy(buf
+copylen
,o
->ptr
,objlen
); 
2160             listDelNode(c
->reply
,ln
); 
2162             if (copylen 
== 0) return; 
2166     /* Now the output buffer is empty, add the new single element */ 
2167     o 
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
)); 
2168     listAddNodeHead(c
->reply
,o
); 
2171 static void sendReplyToClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2172     redisClient 
*c 
= privdata
; 
2173     int nwritten 
= 0, totwritten 
= 0, objlen
; 
2176     REDIS_NOTUSED(mask
); 
2178     /* Use writev() if we have enough buffers to send */ 
2179     if (!server
.glueoutputbuf 
&& 
2180         listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD 
&& 
2181         !(c
->flags 
& REDIS_MASTER
)) 
2183         sendReplyToClientWritev(el
, fd
, privdata
, mask
); 
2187     while(listLength(c
->reply
)) { 
2188         if (server
.glueoutputbuf 
&& listLength(c
->reply
) > 1) 
2189             glueReplyBuffersIfNeeded(c
); 
2191         o 
= listNodeValue(listFirst(c
->reply
)); 
2192         objlen 
= sdslen(o
->ptr
); 
2195             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2199         if (c
->flags 
& REDIS_MASTER
) { 
2200             /* Don't reply to a master */ 
2201             nwritten 
= objlen 
- c
->sentlen
; 
2203             nwritten 
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen 
- c
->sentlen
); 
2204             if (nwritten 
<= 0) break; 
2206         c
->sentlen 
+= nwritten
; 
2207         totwritten 
+= nwritten
; 
2208         /* If we fully sent the object on head go to the next one */ 
2209         if (c
->sentlen 
== objlen
) { 
2210             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2213         /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT 
2214          * bytes, in a single threaded server it's a good idea to serve 
2215          * other clients as well, even if a very large request comes from 
2216          * super fast link that is always able to accept data (in real world 
2217          * scenario think about 'KEYS *' against the loopback interfae) */ 
2218         if (totwritten 
> REDIS_MAX_WRITE_PER_EVENT
) break; 
2220     if (nwritten 
== -1) { 
2221         if (errno 
== EAGAIN
) { 
2224             redisLog(REDIS_VERBOSE
, 
2225                 "Error writing to client: %s", strerror(errno
)); 
2230     if (totwritten 
> 0) c
->lastinteraction 
= time(NULL
); 
2231     if (listLength(c
->reply
) == 0) { 
2233         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2237 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) 
2239     redisClient 
*c 
= privdata
; 
2240     int nwritten 
= 0, totwritten 
= 0, objlen
, willwrite
; 
2242     struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
]; 
2243     int offset
, ion 
= 0; 
2245     REDIS_NOTUSED(mask
); 
2248     while (listLength(c
->reply
)) { 
2249         offset 
= c
->sentlen
; 
2253         /* fill-in the iov[] array */ 
2254         for(node 
= listFirst(c
->reply
); node
; node 
= listNextNode(node
)) { 
2255             o 
= listNodeValue(node
); 
2256             objlen 
= sdslen(o
->ptr
); 
2258             if (totwritten 
+ objlen 
- offset 
> REDIS_MAX_WRITE_PER_EVENT
) 
2261             if(ion 
== REDIS_WRITEV_IOVEC_COUNT
) 
2262                 break; /* no more iovecs */ 
2264             iov
[ion
].iov_base 
= ((char*)o
->ptr
) + offset
; 
2265             iov
[ion
].iov_len 
= objlen 
- offset
; 
2266             willwrite 
+= objlen 
- offset
; 
2267             offset 
= 0; /* just for the first item */ 
2274         /* write all collected blocks at once */ 
2275         if((nwritten 
= writev(fd
, iov
, ion
)) < 0) { 
2276             if (errno 
!= EAGAIN
) { 
2277                 redisLog(REDIS_VERBOSE
, 
2278                          "Error writing to client: %s", strerror(errno
)); 
2285         totwritten 
+= nwritten
; 
2286         offset 
= c
->sentlen
; 
2288         /* remove written robjs from c->reply */ 
2289         while (nwritten 
&& listLength(c
->reply
)) { 
2290             o 
= listNodeValue(listFirst(c
->reply
)); 
2291             objlen 
= sdslen(o
->ptr
); 
2293             if(nwritten 
>= objlen 
- offset
) { 
2294                 listDelNode(c
->reply
, listFirst(c
->reply
)); 
2295                 nwritten 
-= objlen 
- offset
; 
2299                 c
->sentlen 
+= nwritten
; 
2307         c
->lastinteraction 
= time(NULL
); 
2309     if (listLength(c
->reply
) == 0) { 
2311         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2315 static int qsortRedisCommands(const void *r1
, const void *r2
) { 
2317         ((struct redisCommand
*)r1
)->name
, 
2318         ((struct redisCommand
*)r2
)->name
); 
2321 static void sortCommandTable() { 
2322     /* Copy and sort the read-only version of the command table */ 
2323     commandTable 
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
)); 
2324     memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
)); 
2326         sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
), 
2327         sizeof(struct redisCommand
),qsortRedisCommands
); 
2330 static struct redisCommand 
*lookupCommand(char *name
) { 
2331     struct redisCommand tmp 
= {name
,NULL
,0,0,NULL
,0,0,0}; 
2335         sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
), 
2336         sizeof(struct redisCommand
), 
2337         qsortRedisCommands
); 
2340 /* resetClient prepare the client to process the next command */ 
2341 static void resetClient(redisClient 
*c
) { 
2347 /* Call() is the core of Redis execution of a command */ 
2348 static void call(redisClient 
*c
, struct redisCommand 
*cmd
) { 
2351     dirty 
= server
.dirty
; 
2353     dirty 
= server
.dirty
-dirty
; 
2355     if (server
.appendonly 
&& dirty
) 
2356         feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2357     if ((dirty 
|| cmd
->flags 
& REDIS_CMD_FORCE_REPLICATION
) && 
2358         listLength(server
.slaves
)) 
2359         replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
); 
2360     if (listLength(server
.monitors
)) 
2361         replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
); 
2362     server
.stat_numcommands
++; 
2365 /* If this function gets called we already read a whole 
2366  * command, argments are in the client argv/argc fields. 
2367  * processCommand() execute the command or prepare the 
2368  * server for a bulk read from the client. 
2370  * If 1 is returned the client is still alive and valid and 
2371  * and other operations can be performed by the caller. Otherwise 
2372  * if 0 is returned the client was destroied (i.e. after QUIT). */ 
2373 static int processCommand(redisClient 
*c
) { 
2374     struct redisCommand 
*cmd
; 
2376     /* Free some memory if needed (maxmemory setting) */ 
2377     if (server
.maxmemory
) freeMemoryIfNeeded(); 
2379     /* Handle the multi bulk command type. This is an alternative protocol 
2380      * supported by Redis in order to receive commands that are composed of 
2381      * multiple binary-safe "bulk" arguments. The latency of processing is 
2382      * a bit higher but this allows things like multi-sets, so if this 
2383      * protocol is used only for MSET and similar commands this is a big win. */ 
2384     if (c
->multibulk 
== 0 && c
->argc 
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') { 
2385         c
->multibulk 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2386         if (c
->multibulk 
<= 0) { 
2390             decrRefCount(c
->argv
[c
->argc
-1]); 
2394     } else if (c
->multibulk
) { 
2395         if (c
->bulklen 
== -1) { 
2396             if (((char*)c
->argv
[0]->ptr
)[0] != '$') { 
2397                 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n")); 
2401                 int bulklen 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2402                 decrRefCount(c
->argv
[0]); 
2403                 if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2405                     addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2410                 c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2414             c
->mbargv 
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1)); 
2415             c
->mbargv
[c
->mbargc
] = c
->argv
[0]; 
2419             if (c
->multibulk 
== 0) { 
2423                 /* Here we need to swap the multi-bulk argc/argv with the 
2424                  * normal argc/argv of the client structure. */ 
2426                 c
->argv 
= c
->mbargv
; 
2427                 c
->mbargv 
= auxargv
; 
2430                 c
->argc 
= c
->mbargc
; 
2431                 c
->mbargc 
= auxargc
; 
2433                 /* We need to set bulklen to something different than -1 
2434                  * in order for the code below to process the command without 
2435                  * to try to read the last argument of a bulk command as 
2436                  * a special argument. */ 
2438                 /* continue below and process the command */ 
2445     /* -- end of multi bulk commands processing -- */ 
2447     /* The QUIT command is handled as a special case. Normal command 
2448      * procs are unable to close the client connection safely */ 
2449     if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) { 
2454     /* Now lookup the command and check ASAP about trivial error conditions 
2455      * such wrong arity, bad command name and so forth. */ 
2456     cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
2459             sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", 
2460                 (char*)c
->argv
[0]->ptr
)); 
2463     } else if ((cmd
->arity 
> 0 && cmd
->arity 
!= c
->argc
) || 
2464                (c
->argc 
< -cmd
->arity
)) { 
2466             sdscatprintf(sdsempty(), 
2467                 "-ERR wrong number of arguments for '%s' command\r\n", 
2471     } else if (cmd
->flags 
& REDIS_CMD_BULK 
&& c
->bulklen 
== -1) { 
2472         /* This is a bulk command, we have to read the last argument yet. */ 
2473         int bulklen 
= atoi(c
->argv
[c
->argc
-1]->ptr
); 
2475         decrRefCount(c
->argv
[c
->argc
-1]); 
2476         if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2478             addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2483         c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2484         /* It is possible that the bulk read is already in the 
2485          * buffer. Check this condition and handle it accordingly. 
2486          * This is just a fast path, alternative to call processInputBuffer(). 
2487          * It's a good idea since the code is small and this condition 
2488          * happens most of the times. */ 
2489         if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) { 
2490             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2492             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2494             /* Otherwise return... there is to read the last argument 
2495              * from the socket. */ 
2499     /* Let's try to encode the bulk object to save space. */ 
2500     if (cmd
->flags 
& REDIS_CMD_BULK
) 
2501         c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]); 
2503     /* Check if the user is authenticated */ 
2504     if (server
.requirepass 
&& !c
->authenticated 
&& cmd
->proc 
!= authCommand
) { 
2505         addReplySds(c
,sdsnew("-ERR operation not permitted\r\n")); 
2510     /* Handle the maxmemory directive */ 
2511     if (server
.maxmemory 
&& (cmd
->flags 
& REDIS_CMD_DENYOOM
) && 
2512         zmalloc_used_memory() > server
.maxmemory
) 
2514         addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); 
2519     /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ 
2520     if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0) 
2522         cmd
->proc 
!= subscribeCommand 
&& cmd
->proc 
!= unsubscribeCommand 
&& 
2523         cmd
->proc 
!= psubscribeCommand 
&& cmd
->proc 
!= punsubscribeCommand
) { 
2524         addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n")); 
2529     /* Exec the command */ 
2530     if (c
->flags 
& REDIS_MULTI 
&& 
2531         cmd
->proc 
!= execCommand 
&& cmd
->proc 
!= discardCommand 
&& 
2532         cmd
->proc 
!= multiCommand 
&& cmd
->proc 
!= watchCommand
) 
2534         queueMultiCommand(c
,cmd
); 
2535         addReply(c
,shared
.queued
); 
2537         if (server
.vm_enabled 
&& server
.vm_max_threads 
> 0 && 
2538             blockClientOnSwappedKeys(c
,cmd
)) return 1; 
2542     /* Prepare the client for the next command */ 
2547 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
) { 
2552     /* We need 1+(ARGS*3) objects since commands are using the new protocol 
2553      * and we one 1 object for the first "*<count>\r\n" multibulk count, then 
2554      * for every additional object we have "$<count>\r\n" + object + "\r\n". */ 
2555     robj 
*static_outv
[REDIS_STATIC_ARGS
*3+1]; 
2558     if (argc 
<= REDIS_STATIC_ARGS
) { 
2561         outv 
= zmalloc(sizeof(robj
*)*(argc
*3+1)); 
2564     lenobj 
= createObject(REDIS_STRING
, 
2565             sdscatprintf(sdsempty(), "*%d\r\n", argc
)); 
2566     lenobj
->refcount 
= 0; 
2567     outv
[outc
++] = lenobj
; 
2568     for (j 
= 0; j 
< argc
; j
++) { 
2569         lenobj 
= createObject(REDIS_STRING
, 
2570             sdscatprintf(sdsempty(),"$%lu\r\n", 
2571                 (unsigned long) stringObjectLen(argv
[j
]))); 
2572         lenobj
->refcount 
= 0; 
2573         outv
[outc
++] = lenobj
; 
2574         outv
[outc
++] = argv
[j
]; 
2575         outv
[outc
++] = shared
.crlf
; 
2578     /* Increment all the refcounts at start and decrement at end in order to 
2579      * be sure to free objects if there is no slave in a replication state 
2580      * able to be feed with commands */ 
2581     for (j 
= 0; j 
< outc
; j
++) incrRefCount(outv
[j
]); 
2582     listRewind(slaves
,&li
); 
2583     while((ln 
= listNext(&li
))) { 
2584         redisClient 
*slave 
= ln
->value
; 
2586         /* Don't feed slaves that are still waiting for BGSAVE to start */ 
2587         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) continue; 
2589         /* Feed all the other slaves, MONITORs and so on */ 
2590         if (slave
->slaveseldb 
!= dictid
) { 
2594             case 0: selectcmd 
= shared
.select0
; break; 
2595             case 1: selectcmd 
= shared
.select1
; break; 
2596             case 2: selectcmd 
= shared
.select2
; break; 
2597             case 3: selectcmd 
= shared
.select3
; break; 
2598             case 4: selectcmd 
= shared
.select4
; break; 
2599             case 5: selectcmd 
= shared
.select5
; break; 
2600             case 6: selectcmd 
= shared
.select6
; break; 
2601             case 7: selectcmd 
= shared
.select7
; break; 
2602             case 8: selectcmd 
= shared
.select8
; break; 
2603             case 9: selectcmd 
= shared
.select9
; break; 
2605                 selectcmd 
= createObject(REDIS_STRING
, 
2606                     sdscatprintf(sdsempty(),"select %d\r\n",dictid
)); 
2607                 selectcmd
->refcount 
= 0; 
2610             addReply(slave
,selectcmd
); 
2611             slave
->slaveseldb 
= dictid
; 
2613         for (j 
= 0; j 
< outc
; j
++) addReply(slave
,outv
[j
]); 
2615     for (j 
= 0; j 
< outc
; j
++) decrRefCount(outv
[j
]); 
2616     if (outv 
!= static_outv
) zfree(outv
); 
2619 static sds 
sdscatrepr(sds s
, char *p
, size_t len
) { 
2620     s 
= sdscatlen(s
,"\"",1); 
2625             s 
= sdscatprintf(s
,"\\%c",*p
); 
2627         case '\n': s 
= sdscatlen(s
,"\\n",1); break; 
2628         case '\r': s 
= sdscatlen(s
,"\\r",1); break; 
2629         case '\t': s 
= sdscatlen(s
,"\\t",1); break; 
2630         case '\a': s 
= sdscatlen(s
,"\\a",1); break; 
2631         case '\b': s 
= sdscatlen(s
,"\\b",1); break; 
2634                 s 
= sdscatprintf(s
,"%c",*p
); 
2636                 s 
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
); 
2641     return sdscatlen(s
,"\"",1); 
2644 static void replicationFeedMonitors(list 
*monitors
, int dictid
, robj 
**argv
, int argc
) { 
2648     sds cmdrepr 
= sdsnew("+"); 
2652     gettimeofday(&tv
,NULL
); 
2653     cmdrepr 
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
); 
2654     if (dictid 
!= 0) cmdrepr 
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
); 
2656     for (j 
= 0; j 
< argc
; j
++) { 
2657         if (argv
[j
]->encoding 
== REDIS_ENCODING_INT
) { 
2658             cmdrepr 
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
); 
2660             cmdrepr 
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
, 
2661                         sdslen(argv
[j
]->ptr
)); 
2664             cmdrepr 
= sdscatlen(cmdrepr
," ",1); 
2666     cmdrepr 
= sdscatlen(cmdrepr
,"\r\n",2); 
2667     cmdobj 
= createObject(REDIS_STRING
,cmdrepr
); 
2669     listRewind(monitors
,&li
); 
2670     while((ln 
= listNext(&li
))) { 
2671         redisClient 
*monitor 
= ln
->value
; 
2672         addReply(monitor
,cmdobj
); 
2674     decrRefCount(cmdobj
); 
2677 static void processInputBuffer(redisClient 
*c
) { 
2679     /* Before to process the input buffer, make sure the client is not 
2680      * waitig for a blocking operation such as BLPOP. Note that the first 
2681      * iteration the client is never blocked, otherwise the processInputBuffer 
2682      * would not be called at all, but after the execution of the first commands 
2683      * in the input buffer the client may be blocked, and the "goto again" 
2684      * will try to reiterate. The following line will make it return asap. */ 
2685     if (c
->flags 
& REDIS_BLOCKED 
|| c
->flags 
& REDIS_IO_WAIT
) return; 
2686     if (c
->bulklen 
== -1) { 
2687         /* Read the first line of the query */ 
2688         char *p 
= strchr(c
->querybuf
,'\n'); 
2695             query 
= c
->querybuf
; 
2696             c
->querybuf 
= sdsempty(); 
2697             querylen 
= 1+(p
-(query
)); 
2698             if (sdslen(query
) > querylen
) { 
2699                 /* leave data after the first line of the query in the buffer */ 
2700                 c
->querybuf 
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
); 
2702             *p 
= '\0'; /* remove "\n" */ 
2703             if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */ 
2704             sdsupdatelen(query
); 
2706             /* Now we can split the query in arguments */ 
2707             argv 
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
); 
2710             if (c
->argv
) zfree(c
->argv
); 
2711             c
->argv 
= zmalloc(sizeof(robj
*)*argc
); 
2713             for (j 
= 0; j 
< argc
; j
++) { 
2714                 if (sdslen(argv
[j
])) { 
2715                     c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]); 
2723                 /* Execute the command. If the client is still valid 
2724                  * after processCommand() return and there is something 
2725                  * on the query buffer try to process the next command. */ 
2726                 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2728                 /* Nothing to process, argc == 0. Just process the query 
2729                  * buffer if it's not empty or return to the caller */ 
2730                 if (sdslen(c
->querybuf
)) goto again
; 
2733         } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) { 
2734             redisLog(REDIS_VERBOSE
, "Client protocol error"); 
2739         /* Bulk read handling. Note that if we are at this point 
2740            the client already sent a command terminated with a newline, 
2741            we are reading the bulk data that is actually the last 
2742            argument of the command. */ 
2743         int qbl 
= sdslen(c
->querybuf
); 
2745         if (c
->bulklen 
<= qbl
) { 
2746             /* Copy everything but the final CRLF as final argument */ 
2747             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2749             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2750             /* Process the command. If the client is still valid after 
2751              * the processing and there is more data in the buffer 
2752              * try to parse it. */ 
2753             if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2759 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2760     redisClient 
*c 
= (redisClient
*) privdata
; 
2761     char buf
[REDIS_IOBUF_LEN
]; 
2764     REDIS_NOTUSED(mask
); 
2766     nread 
= read(fd
, buf
, REDIS_IOBUF_LEN
); 
2768         if (errno 
== EAGAIN
) { 
2771             redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
)); 
2775     } else if (nread 
== 0) { 
2776         redisLog(REDIS_VERBOSE
, "Client closed connection"); 
2781         c
->querybuf 
= sdscatlen(c
->querybuf
, buf
, nread
); 
2782         c
->lastinteraction 
= time(NULL
); 
2786     processInputBuffer(c
); 
2789 static int selectDb(redisClient 
*c
, int id
) { 
2790     if (id 
< 0 || id 
>= server
.dbnum
) 
2792     c
->db 
= &server
.db
[id
]; 
2796 static void *dupClientReplyValue(void *o
) { 
2797     incrRefCount((robj
*)o
); 
2801 static int listMatchObjects(void *a
, void *b
) { 
2802     return equalStringObjects(a
,b
); 
2805 static redisClient 
*createClient(int fd
) { 
2806     redisClient 
*c 
= zmalloc(sizeof(*c
)); 
2808     anetNonBlock(NULL
,fd
); 
2809     anetTcpNoDelay(NULL
,fd
); 
2810     if (!c
) return NULL
; 
2813     c
->querybuf 
= sdsempty(); 
2822     c
->lastinteraction 
= time(NULL
); 
2823     c
->authenticated 
= 0; 
2824     c
->replstate 
= REDIS_REPL_NONE
; 
2825     c
->reply 
= listCreate(); 
2826     listSetFreeMethod(c
->reply
,decrRefCount
); 
2827     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
2828     c
->blocking_keys 
= NULL
; 
2829     c
->blocking_keys_num 
= 0; 
2830     c
->io_keys 
= listCreate(); 
2831     c
->watched_keys 
= listCreate(); 
2832     listSetFreeMethod(c
->io_keys
,decrRefCount
); 
2833     c
->pubsub_channels 
= dictCreate(&setDictType
,NULL
); 
2834     c
->pubsub_patterns 
= listCreate(); 
2835     listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
); 
2836     listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
); 
2837     if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
2838         readQueryFromClient
, c
) == AE_ERR
) { 
2842     listAddNodeTail(server
.clients
,c
); 
2843     initClientMultiState(c
); 
2847 static void addReply(redisClient 
*c
, robj 
*obj
) { 
2848     if (listLength(c
->reply
) == 0 && 
2849         (c
->replstate 
== REDIS_REPL_NONE 
|| 
2850          c
->replstate 
== REDIS_REPL_ONLINE
) && 
2851         aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
, 
2852         sendReplyToClient
, c
) == AE_ERR
) return; 
2854     if (server
.vm_enabled 
&& obj
->storage 
!= REDIS_VM_MEMORY
) { 
2855         obj 
= dupStringObject(obj
); 
2856         obj
->refcount 
= 0; /* getDecodedObject() will increment the refcount */ 
2858     listAddNodeTail(c
->reply
,getDecodedObject(obj
)); 
2861 static void addReplySds(redisClient 
*c
, sds s
) { 
2862     robj 
*o 
= createObject(REDIS_STRING
,s
); 
2867 static void addReplyDouble(redisClient 
*c
, double d
) { 
2870     snprintf(buf
,sizeof(buf
),"%.17g",d
); 
2871     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", 
2872         (unsigned long) strlen(buf
),buf
)); 
2875 static void addReplyLongLong(redisClient 
*c
, long long ll
) { 
2880         addReply(c
,shared
.czero
); 
2882     } else if (ll 
== 1) { 
2883         addReply(c
,shared
.cone
); 
2887     len 
= ll2string(buf
+1,sizeof(buf
)-1,ll
); 
2890     addReplySds(c
,sdsnewlen(buf
,len
+3)); 
2893 static void addReplyUlong(redisClient 
*c
, unsigned long ul
) { 
2898         addReply(c
,shared
.czero
); 
2900     } else if (ul 
== 1) { 
2901         addReply(c
,shared
.cone
); 
2904     len 
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
); 
2905     addReplySds(c
,sdsnewlen(buf
,len
)); 
2908 static void addReplyBulkLen(redisClient 
*c
, robj 
*obj
) { 
2912     if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
2913         len 
= sdslen(obj
->ptr
); 
2915         long n 
= (long)obj
->ptr
; 
2917         /* Compute how many bytes will take this integer as a radix 10 string */ 
2923         while((n 
= n
/10) != 0) { 
2928     intlen 
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
); 
2929     buf
[intlen
+1] = '\r'; 
2930     buf
[intlen
+2] = '\n'; 
2931     addReplySds(c
,sdsnewlen(buf
,intlen
+3)); 
2934 static void addReplyBulk(redisClient 
*c
, robj 
*obj
) { 
2935     addReplyBulkLen(c
,obj
); 
2937     addReply(c
,shared
.crlf
); 
2940 static void addReplyBulkSds(redisClient 
*c
, sds s
) { 
2941     robj 
*o 
= createStringObject(s
, sdslen(s
)); 
2946 /* In the CONFIG command we need to add vanilla C string as bulk replies */ 
2947 static void addReplyBulkCString(redisClient 
*c
, char *s
) { 
2949         addReply(c
,shared
.nullbulk
); 
2951         robj 
*o 
= createStringObject(s
,strlen(s
)); 
2957 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2962     REDIS_NOTUSED(mask
); 
2963     REDIS_NOTUSED(privdata
); 
2965     cfd 
= anetAccept(server
.neterr
, fd
, cip
, &cport
); 
2966     if (cfd 
== AE_ERR
) { 
2967         redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
); 
2970     redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
); 
2971     if ((c 
= createClient(cfd
)) == NULL
) { 
2972         redisLog(REDIS_WARNING
,"Error allocating resoures for the client"); 
2973         close(cfd
); /* May be already closed, just ingore errors */ 
2976     /* If maxclient directive is set and this is one client more... close the 
2977      * connection. Note that we create the client instead to check before 
2978      * for this condition, since now the socket is already set in nonblocking 
2979      * mode and we can send an error for free using the Kernel I/O */ 
2980     if (server
.maxclients 
&& listLength(server
.clients
) > server
.maxclients
) { 
2981         char *err 
= "-ERR max number of clients reached\r\n"; 
2983         /* That's a best effort error message, don't check write errors */ 
2984         if (write(c
->fd
,err
,strlen(err
)) == -1) { 
2985             /* Nothing to do, Just to avoid the warning... */ 
2990     server
.stat_numconnections
++; 
2993 /* ======================= Redis objects implementation ===================== */ 
2995 static robj 
*createObject(int type
, void *ptr
) { 
2998     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2999     if (listLength(server
.objfreelist
)) { 
3000         listNode 
*head 
= listFirst(server
.objfreelist
); 
3001         o 
= listNodeValue(head
); 
3002         listDelNode(server
.objfreelist
,head
); 
3003         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3005         if (server
.vm_enabled
) 
3006             pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3007         o 
= zmalloc(sizeof(*o
)); 
3010     o
->encoding 
= REDIS_ENCODING_RAW
; 
3013     if (server
.vm_enabled
) { 
3014         /* Note that this code may run in the context of an I/O thread 
3015          * and accessing server.lruclock in theory is an error 
3016          * (no locks). But in practice this is safe, and even if we read 
3017          * garbage Redis will not fail. */ 
3018         o
->lru 
= server
.lruclock
; 
3019         o
->storage 
= REDIS_VM_MEMORY
; 
3024 static robj 
*createStringObject(char *ptr
, size_t len
) { 
3025     return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
)); 
3028 static robj 
*createStringObjectFromLongLong(long long value
) { 
3030     if (value 
>= 0 && value 
< REDIS_SHARED_INTEGERS
) { 
3031         incrRefCount(shared
.integers
[value
]); 
3032         o 
= shared
.integers
[value
]; 
3034         if (value 
>= LONG_MIN 
&& value 
<= LONG_MAX
) { 
3035             o 
= createObject(REDIS_STRING
, NULL
); 
3036             o
->encoding 
= REDIS_ENCODING_INT
; 
3037             o
->ptr 
= (void*)((long)value
); 
3039             o 
= createObject(REDIS_STRING
,sdsfromlonglong(value
)); 
3045 static robj 
*dupStringObject(robj 
*o
) { 
3046     assert(o
->encoding 
== REDIS_ENCODING_RAW
); 
3047     return createStringObject(o
->ptr
,sdslen(o
->ptr
)); 
3050 static robj 
*createListObject(void) { 
3051     list 
*l 
= listCreate(); 
3052     robj 
*o 
= createObject(REDIS_LIST
,l
); 
3053     listSetFreeMethod(l
,decrRefCount
); 
3054     o
->encoding 
= REDIS_ENCODING_LIST
; 
3058 static robj 
*createZiplistObject(void) { 
3059     unsigned char *zl 
= ziplistNew(); 
3060     robj 
*o 
= createObject(REDIS_LIST
,zl
); 
3061     o
->encoding 
= REDIS_ENCODING_ZIPLIST
; 
3065 static robj 
*createSetObject(void) { 
3066     dict 
*d 
= dictCreate(&setDictType
,NULL
); 
3067     return createObject(REDIS_SET
,d
); 
3070 static robj 
*createHashObject(void) { 
3071     /* All the Hashes start as zipmaps. Will be automatically converted 
3072      * into hash tables if there are enough elements or big elements 
3074     unsigned char *zm 
= zipmapNew(); 
3075     robj 
*o 
= createObject(REDIS_HASH
,zm
); 
3076     o
->encoding 
= REDIS_ENCODING_ZIPMAP
; 
3080 static robj 
*createZsetObject(void) { 
3081     zset 
*zs 
= zmalloc(sizeof(*zs
)); 
3083     zs
->dict 
= dictCreate(&zsetDictType
,NULL
); 
3084     zs
->zsl 
= zslCreate(); 
3085     return createObject(REDIS_ZSET
,zs
); 
3088 static void freeStringObject(robj 
*o
) { 
3089     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3094 static void freeListObject(robj 
*o
) { 
3095     switch (o
->encoding
) { 
3096     case REDIS_ENCODING_LIST
: 
3097         listRelease((list
*) o
->ptr
); 
3099     case REDIS_ENCODING_ZIPLIST
: 
3103         redisPanic("Unknown list encoding type"); 
3107 static void freeSetObject(robj 
*o
) { 
3108     dictRelease((dict
*) o
->ptr
); 
3111 static void freeZsetObject(robj 
*o
) { 
3114     dictRelease(zs
->dict
); 
3119 static void freeHashObject(robj 
*o
) { 
3120     switch (o
->encoding
) { 
3121     case REDIS_ENCODING_HT
: 
3122         dictRelease((dict
*) o
->ptr
); 
3124     case REDIS_ENCODING_ZIPMAP
: 
3128         redisPanic("Unknown hash encoding type"); 
3133 static void incrRefCount(robj 
*o
) { 
3137 static void decrRefCount(void *obj
) { 
3140     /* Object is a swapped out value, or in the process of being loaded. */ 
3141     if (server
.vm_enabled 
&& 
3142         (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
)) 
3144         vmpointer 
*vp 
= obj
; 
3145         if (o
->storage 
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(o
); 
3146         vmMarkPagesFree(vp
->page
,vp
->usedpages
); 
3147         server
.vm_stats_swapped_objects
--; 
3152     if (o
->refcount 
<= 0) redisPanic("decrRefCount against refcount <= 0"); 
3153     /* Object is in memory, or in the process of being swapped out. 
3155      * If the object is being swapped out, abort the operation on 
3156      * decrRefCount even if the refcount does not drop to 0: the object 
3157      * is referenced at least two times, as value of the key AND as 
3158      * job->val in the iojob. So if we don't invalidate the iojob, when it is 
3159      * done but the relevant key was removed in the meantime, the 
3160      * complete jobs handler will not find the key about the job and the 
3161      * assert will fail. */ 
3162     if (server
.vm_enabled 
&& o
->storage 
== REDIS_VM_SWAPPING
) 
3163         vmCancelThreadedIOJob(o
); 
3164     if (--(o
->refcount
) == 0) { 
3166         case REDIS_STRING
: freeStringObject(o
); break; 
3167         case REDIS_LIST
: freeListObject(o
); break; 
3168         case REDIS_SET
: freeSetObject(o
); break; 
3169         case REDIS_ZSET
: freeZsetObject(o
); break; 
3170         case REDIS_HASH
: freeHashObject(o
); break; 
3171         default: redisPanic("Unknown object type"); break; 
3173         if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
3174         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
3175             !listAddNodeHead(server
.objfreelist
,o
)) 
3177         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3181 static int checkType(redisClient 
*c
, robj 
*o
, int type
) { 
3182     if (o
->type 
!= type
) { 
3183         addReply(c
,shared
.wrongtypeerr
); 
3189 /* Check if the nul-terminated string 's' can be represented by a long 
3190  * (that is, is a number that fits into long without any other space or 
3191  * character before or after the digits). 
3193  * If so, the function returns REDIS_OK and *longval is set to the value 
3194  * of the number. Otherwise REDIS_ERR is returned */ 
3195 static int isStringRepresentableAsLong(sds s
, long *longval
) { 
3196     char buf
[32], *endptr
; 
3200     value 
= strtol(s
, &endptr
, 10); 
3201     if (endptr
[0] != '\0') return REDIS_ERR
; 
3202     slen 
= ll2string(buf
,32,value
); 
3204     /* If the number converted back into a string is not identical 
3205      * then it's not possible to encode the string as integer */ 
3206     if (sdslen(s
) != (unsigned)slen 
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
; 
3207     if (longval
) *longval 
= value
; 
3211 /* Try to encode a string object in order to save space */ 
3212 static robj 
*tryObjectEncoding(robj 
*o
) { 
3216     if (o
->encoding 
!= REDIS_ENCODING_RAW
) 
3217         return o
; /* Already encoded */ 
3219     /* It's not safe to encode shared objects: shared objects can be shared 
3220      * everywhere in the "object space" of Redis. Encoded objects can only 
3221      * appear as "values" (and not, for instance, as keys) */ 
3222      if (o
->refcount 
> 1) return o
; 
3224     /* Currently we try to encode only strings */ 
3225     redisAssert(o
->type 
== REDIS_STRING
); 
3227     /* Check if we can represent this string as a long integer */ 
3228     if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
; 
3230     /* Ok, this object can be encoded */ 
3231     if (value 
>= 0 && value 
< REDIS_SHARED_INTEGERS
) { 
3233         incrRefCount(shared
.integers
[value
]); 
3234         return shared
.integers
[value
]; 
3236         o
->encoding 
= REDIS_ENCODING_INT
; 
3238         o
->ptr 
= (void*) value
; 
3243 /* Get a decoded version of an encoded object (returned as a new object). 
3244  * If the object is already raw-encoded just increment the ref count. */ 
3245 static robj 
*getDecodedObject(robj 
*o
) { 
3248     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3252     if (o
->type 
== REDIS_STRING 
&& o
->encoding 
== REDIS_ENCODING_INT
) { 
3255         ll2string(buf
,32,(long)o
->ptr
); 
3256         dec 
= createStringObject(buf
,strlen(buf
)); 
3259         redisPanic("Unknown encoding type"); 
3263 /* Compare two string objects via strcmp() or alike. 
3264  * Note that the objects may be integer-encoded. In such a case we 
3265  * use ll2string() to get a string representation of the numbers on the stack 
3266  * and compare the strings, it's much faster than calling getDecodedObject(). 
3268  * Important note: if objects are not integer encoded, but binary-safe strings, 
3269  * sdscmp() from sds.c will apply memcmp() so this function ca be considered 
3271 static int compareStringObjects(robj 
*a
, robj 
*b
) { 
3272     redisAssert(a
->type 
== REDIS_STRING 
&& b
->type 
== REDIS_STRING
); 
3273     char bufa
[128], bufb
[128], *astr
, *bstr
; 
3276     if (a 
== b
) return 0; 
3277     if (a
->encoding 
!= REDIS_ENCODING_RAW
) { 
3278         ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
); 
3284     if (b
->encoding 
!= REDIS_ENCODING_RAW
) { 
3285         ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
); 
3291     return bothsds 
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
); 
3294 /* Equal string objects return 1 if the two objects are the same from the 
3295  * point of view of a string comparison, otherwise 0 is returned. Note that 
3296  * this function is faster then checking for (compareStringObject(a,b) == 0) 
3297  * because it can perform some more optimization. */ 
3298 static int equalStringObjects(robj 
*a
, robj 
*b
) { 
3299     if (a
->encoding 
!= REDIS_ENCODING_RAW 
&& b
->encoding 
!= REDIS_ENCODING_RAW
){ 
3300         return a
->ptr 
== b
->ptr
; 
3302         return compareStringObjects(a
,b
) == 0; 
3306 static size_t stringObjectLen(robj 
*o
) { 
3307     redisAssert(o
->type 
== REDIS_STRING
); 
3308     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3309         return sdslen(o
->ptr
); 
3313         return ll2string(buf
,32,(long)o
->ptr
); 
3317 static int getDoubleFromObject(robj 
*o
, double *target
) { 
3324         redisAssert(o
->type 
== REDIS_STRING
); 
3325         if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3326             value 
= strtod(o
->ptr
, &eptr
); 
3327             if (eptr
[0] != '\0') return REDIS_ERR
; 
3328         } else if (o
->encoding 
== REDIS_ENCODING_INT
) { 
3329             value 
= (long)o
->ptr
; 
3331             redisPanic("Unknown string encoding"); 
3339 static int getDoubleFromObjectOrReply(redisClient 
*c
, robj 
*o
, double *target
, const char *msg
) { 
3341     if (getDoubleFromObject(o
, &value
) != REDIS_OK
) { 
3343             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3345             addReplySds(c
, sdsnew("-ERR value is not a double\r\n")); 
3354 static int getLongLongFromObject(robj 
*o
, long long *target
) { 
3361         redisAssert(o
->type 
== REDIS_STRING
); 
3362         if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3363             value 
= strtoll(o
->ptr
, &eptr
, 10); 
3364             if (eptr
[0] != '\0') return REDIS_ERR
; 
3365         } else if (o
->encoding 
== REDIS_ENCODING_INT
) { 
3366             value 
= (long)o
->ptr
; 
3368             redisPanic("Unknown string encoding"); 
3376 static int getLongLongFromObjectOrReply(redisClient 
*c
, robj 
*o
, long long *target
, const char *msg
) { 
3378     if (getLongLongFromObject(o
, &value
) != REDIS_OK
) { 
3380             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3382             addReplySds(c
, sdsnew("-ERR value is not an integer\r\n")); 
3391 static int getLongFromObjectOrReply(redisClient 
*c
, robj 
*o
, long *target
, const char *msg
) { 
3394     if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
; 
3395     if (value 
< LONG_MIN 
|| value 
> LONG_MAX
) { 
3397             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3399             addReplySds(c
, sdsnew("-ERR value is out of range\r\n")); 
3408 /* =========================== Keyspace access API ========================== */ 
3410 static robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
3411     dictEntry 
*de 
= dictFind(db
->dict
,key
->ptr
); 
3413         robj 
*val 
= dictGetEntryVal(de
); 
3415         if (server
.vm_enabled
) { 
3416             if (val
->storage 
== REDIS_VM_MEMORY 
|| 
3417                 val
->storage 
== REDIS_VM_SWAPPING
) 
3419                 /* If we were swapping the object out, cancel the operation */ 
3420                 if (val
->storage 
== REDIS_VM_SWAPPING
) 
3421                     vmCancelThreadedIOJob(val
); 
3422                 /* Update the access time for the aging algorithm. */ 
3423                 val
->lru 
= server
.lruclock
; 
3425                 int notify 
= (val
->storage 
== REDIS_VM_LOADING
); 
3427                 /* Our value was swapped on disk. Bring it at home. */ 
3428                 redisAssert(val
->type 
== REDIS_VMPOINTER
); 
3429                 val 
= vmLoadObject(val
); 
3430                 dictGetEntryVal(de
) = val
; 
3432                 /* Clients blocked by the VM subsystem may be waiting for 
3434                 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
); 
3443 static robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
3444     expireIfNeeded(db
,key
); 
3445     return lookupKey(db
,key
); 
3448 static robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
3449     deleteIfVolatile(db
,key
); 
3450     touchWatchedKey(db
,key
); 
3451     return lookupKey(db
,key
); 
3454 static robj 
*lookupKeyReadOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
3455     robj 
*o 
= lookupKeyRead(c
->db
, key
); 
3456     if (!o
) addReply(c
,reply
); 
3460 static robj 
*lookupKeyWriteOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
3461     robj 
*o 
= lookupKeyWrite(c
->db
, key
); 
3462     if (!o
) addReply(c
,reply
); 
3466 /* Add the key to the DB. If the key already exists REDIS_ERR is returned, 
3467  * otherwise REDIS_OK is returned, and the caller should increment the 
3468  * refcount of 'val'. */ 
3469 static int dbAdd(redisDb 
*db
, robj 
*key
, robj 
*val
) { 
3470     /* Perform a lookup before adding the key, as we need to copy the 
3472     if (dictFind(db
->dict
, key
->ptr
) != NULL
) { 
3475         sds copy 
= sdsdup(key
->ptr
); 
3476         dictAdd(db
->dict
, copy
, val
); 
3481 /* If the key does not exist, this is just like dbAdd(). Otherwise 
3482  * the value associated to the key is replaced with the new one. 
3484  * On update (key already existed) 0 is returned. Otherwise 1. */ 
3485 static int dbReplace(redisDb 
*db
, robj 
*key
, robj 
*val
) { 
3486     if (dictFind(db
->dict
,key
->ptr
) == NULL
) { 
3487         sds copy 
= sdsdup(key
->ptr
); 
3488         dictAdd(db
->dict
, copy
, val
); 
3491         dictReplace(db
->dict
, key
->ptr
, val
); 
3496 static int dbExists(redisDb 
*db
, robj 
*key
) { 
3497     return dictFind(db
->dict
,key
->ptr
) != NULL
; 
3500 /* Return a random key, in form of a Redis object. 
3501  * If there are no keys, NULL is returned. 
3503  * The function makes sure to return keys not already expired. */ 
3504 static robj 
*dbRandomKey(redisDb 
*db
) { 
3505     struct dictEntry 
*de
; 
3511         de 
= dictGetRandomKey(db
->dict
); 
3512         if (de 
== NULL
) return NULL
; 
3514         key 
= dictGetEntryKey(de
); 
3515         keyobj 
= createStringObject(key
,sdslen(key
)); 
3516         if (dictFind(db
->expires
,key
)) { 
3517             if (expireIfNeeded(db
,keyobj
)) { 
3518                 decrRefCount(keyobj
); 
3519                 continue; /* search for another key. This expired. */ 
3526 /* Delete a key, value, and associated expiration entry if any, from the DB */ 
3527 static int dbDelete(redisDb 
*db
, robj 
*key
) { 
3530     if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
->ptr
); 
3531     retval 
= dictDelete(db
->dict
,key
->ptr
); 
3533     return retval 
== DICT_OK
; 
3536 /*============================ RDB saving/loading =========================== */ 
3538 static int rdbSaveType(FILE *fp
, unsigned char type
) { 
3539     if (fwrite(&type
,1,1,fp
) == 0) return -1; 
3543 static int rdbSaveTime(FILE *fp
, time_t t
) { 
3544     int32_t t32 
= (int32_t) t
; 
3545     if (fwrite(&t32
,4,1,fp
) == 0) return -1; 
3549 /* check rdbLoadLen() comments for more info */ 
3550 static int rdbSaveLen(FILE *fp
, uint32_t len
) { 
3551     unsigned char buf
[2]; 
3554         /* Save a 6 bit len */ 
3555         buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6); 
3556         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3557     } else if (len 
< (1<<14)) { 
3558         /* Save a 14 bit len */ 
3559         buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6); 
3561         if (fwrite(buf
,2,1,fp
) == 0) return -1; 
3563         /* Save a 32 bit len */ 
3564         buf
[0] = (REDIS_RDB_32BITLEN
<<6); 
3565         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3567         if (fwrite(&len
,4,1,fp
) == 0) return -1; 
3572 /* Encode 'value' as an integer if possible (if integer will fit the 
3573  * supported range). If the function sucessful encoded the integer 
3574  * then the (up to 5 bytes) encoded representation is written in the 
3575  * string pointed by 'enc' and the length is returned. Otherwise 
3577 static int rdbEncodeInteger(long long value
, unsigned char *enc
) { 
3578     /* Finally check if it fits in our ranges */ 
3579     if (value 
>= -(1<<7) && value 
<= (1<<7)-1) { 
3580         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
; 
3581         enc
[1] = value
&0xFF; 
3583     } else if (value 
>= -(1<<15) && value 
<= (1<<15)-1) { 
3584         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
; 
3585         enc
[1] = value
&0xFF; 
3586         enc
[2] = (value
>>8)&0xFF; 
3588     } else if (value 
>= -((long long)1<<31) && value 
<= ((long long)1<<31)-1) { 
3589         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
; 
3590         enc
[1] = value
&0xFF; 
3591         enc
[2] = (value
>>8)&0xFF; 
3592         enc
[3] = (value
>>16)&0xFF; 
3593         enc
[4] = (value
>>24)&0xFF; 
3600 /* String objects in the form "2391" "-100" without any space and with a 
3601  * range of values that can fit in an 8, 16 or 32 bit signed value can be 
3602  * encoded as integers to save space */ 
3603 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) { 
3605     char *endptr
, buf
[32]; 
3607     /* Check if it's possible to encode this value as a number */ 
3608     value 
= strtoll(s
, &endptr
, 10); 
3609     if (endptr
[0] != '\0') return 0; 
3610     ll2string(buf
,32,value
); 
3612     /* If the number converted back into a string is not identical 
3613      * then it's not possible to encode the string as integer */ 
3614     if (strlen(buf
) != len 
|| memcmp(buf
,s
,len
)) return 0; 
3616     return rdbEncodeInteger(value
,enc
); 
3619 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) { 
3620     size_t comprlen
, outlen
; 
3624     /* We require at least four bytes compression for this to be worth it */ 
3625     if (len 
<= 4) return 0; 
3627     if ((out 
= zmalloc(outlen
+1)) == NULL
) return 0; 
3628     comprlen 
= lzf_compress(s
, len
, out
, outlen
); 
3629     if (comprlen 
== 0) { 
3633     /* Data compressed! Let's save it on disk */ 
3634     byte 
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
; 
3635     if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
; 
3636     if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
; 
3637     if (rdbSaveLen(fp
,len
) == -1) goto writeerr
; 
3638     if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
; 
3647 /* Save a string objet as [len][data] on disk. If the object is a string 
3648  * representation of an integer value we try to safe it in a special form */ 
3649 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) { 
3652     /* Try integer encoding */ 
3654         unsigned char buf
[5]; 
3655         if ((enclen 
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) { 
3656             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3661     /* Try LZF compression - under 20 bytes it's unable to compress even 
3662      * aaaaaaaaaaaaaaaaaa so skip it */ 
3663     if (server
.rdbcompression 
&& len 
> 20) { 
3666         retval 
= rdbSaveLzfStringObject(fp
,s
,len
); 
3667         if (retval 
== -1) return -1; 
3668         if (retval 
> 0) return 0; 
3669         /* retval == 0 means data can't be compressed, save the old way */ 
3672     /* Store verbatim */ 
3673     if (rdbSaveLen(fp
,len
) == -1) return -1; 
3674     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return -1; 
3678 /* Save a long long value as either an encoded string or a string. */ 
3679 static int rdbSaveLongLongAsStringObject(FILE *fp
, long long value
) { 
3680     unsigned char buf
[32]; 
3681     int enclen 
= rdbEncodeInteger(value
,buf
); 
3683         if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3685         /* Encode as string */ 
3686         enclen 
= ll2string((char*)buf
,32,value
); 
3687         redisAssert(enclen 
< 32); 
3688         if (rdbSaveLen(fp
,enclen
) == -1) return -1; 
3689         if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3694 /* Like rdbSaveStringObjectRaw() but handle encoded objects */ 
3695 static int rdbSaveStringObject(FILE *fp
, robj 
*obj
) { 
3696     /* Avoid to decode the object, then encode it again, if the 
3697      * object is alrady integer encoded. */ 
3698     if (obj
->encoding 
== REDIS_ENCODING_INT
) { 
3699         return rdbSaveLongLongAsStringObject(fp
,(long)obj
->ptr
); 
3701         redisAssert(obj
->encoding 
== REDIS_ENCODING_RAW
); 
3702         return rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3706 /* Save a double value. Doubles are saved as strings prefixed by an unsigned 
3707  * 8 bit integer specifing the length of the representation. 
3708  * This 8 bit integer has special values in order to specify the following 
3714 static int rdbSaveDoubleValue(FILE *fp
, double val
) { 
3715     unsigned char buf
[128]; 
3721     } else if (!isfinite(val
)) { 
3723         buf
[0] = (val 
< 0) ? 255 : 254; 
3725 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL) 
3726         /* Check if the float is in a safe range to be casted into a 
3727          * long long. We are assuming that long long is 64 bit here. 
3728          * Also we are assuming that there are no implementations around where 
3729          * double has precision < 52 bit. 
3731          * Under this assumptions we test if a double is inside an interval 
3732          * where casting to long long is safe. Then using two castings we 
3733          * make sure the decimal part is zero. If all this is true we use 
3734          * integer printing function that is much faster. */ 
3735         double min 
= -4503599627370495; /* (2^52)-1 */ 
3736         double max 
= 4503599627370496; /* -(2^52) */ 
3737         if (val 
> min 
&& val 
< max 
&& val 
== ((double)((long long)val
))) 
3738             ll2string((char*)buf
+1,sizeof(buf
),(long long)val
); 
3741             snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
); 
3742         buf
[0] = strlen((char*)buf
+1); 
3745     if (fwrite(buf
,len
,1,fp
) == 0) return -1; 
3749 /* Save a Redis object. */ 
3750 static int rdbSaveObject(FILE *fp
, robj 
*o
) { 
3751     if (o
->type 
== REDIS_STRING
) { 
3752         /* Save a string value */ 
3753         if (rdbSaveStringObject(fp
,o
) == -1) return -1; 
3754     } else if (o
->type 
== REDIS_LIST
) { 
3755         /* Save a list value */ 
3756         if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
3758             unsigned char *vstr
; 
3762             if (rdbSaveLen(fp
,ziplistLen(o
->ptr
)) == -1) return -1; 
3763             p 
= ziplistIndex(o
->ptr
,0); 
3764             while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
3766                     if (rdbSaveRawString(fp
,vstr
,vlen
) == -1) 
3769                     if (rdbSaveLongLongAsStringObject(fp
,vlong
) == -1) 
3772                 p 
= ziplistNext(o
->ptr
,p
); 
3774         } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
3775             list 
*list 
= o
->ptr
; 
3779             if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1; 
3780             listRewind(list
,&li
); 
3781             while((ln 
= listNext(&li
))) { 
3782                 robj 
*eleobj 
= listNodeValue(ln
); 
3783                 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3786             redisPanic("Unknown list encoding"); 
3788     } else if (o
->type 
== REDIS_SET
) { 
3789         /* Save a set value */ 
3791         dictIterator 
*di 
= dictGetIterator(set
); 
3794         if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1; 
3795         while((de 
= dictNext(di
)) != NULL
) { 
3796             robj 
*eleobj 
= dictGetEntryKey(de
); 
3798             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3800         dictReleaseIterator(di
); 
3801     } else if (o
->type 
== REDIS_ZSET
) { 
3802         /* Save a set value */ 
3804         dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
3807         if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1; 
3808         while((de 
= dictNext(di
)) != NULL
) { 
3809             robj 
*eleobj 
= dictGetEntryKey(de
); 
3810             double *score 
= dictGetEntryVal(de
); 
3812             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3813             if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1; 
3815         dictReleaseIterator(di
); 
3816     } else if (o
->type 
== REDIS_HASH
) { 
3817         /* Save a hash value */ 
3818         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
3819             unsigned char *p 
= zipmapRewind(o
->ptr
); 
3820             unsigned int count 
= zipmapLen(o
->ptr
); 
3821             unsigned char *key
, *val
; 
3822             unsigned int klen
, vlen
; 
3824             if (rdbSaveLen(fp
,count
) == -1) return -1; 
3825             while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
3826                 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1; 
3827                 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1; 
3830             dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
3833             if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1; 
3834             while((de 
= dictNext(di
)) != NULL
) { 
3835                 robj 
*key 
= dictGetEntryKey(de
); 
3836                 robj 
*val 
= dictGetEntryVal(de
); 
3838                 if (rdbSaveStringObject(fp
,key
) == -1) return -1; 
3839                 if (rdbSaveStringObject(fp
,val
) == -1) return -1; 
3841             dictReleaseIterator(di
); 
3844         redisPanic("Unknown object type"); 
3849 /* Return the length the object will have on disk if saved with 
3850  * the rdbSaveObject() function. Currently we use a trick to get 
3851  * this length with very little changes to the code. In the future 
3852  * we could switch to a faster solution. */ 
3853 static off_t 
rdbSavedObjectLen(robj 
*o
, FILE *fp
) { 
3854     if (fp 
== NULL
) fp 
= server
.devnull
; 
3856     assert(rdbSaveObject(fp
,o
) != 1); 
3860 /* Return the number of pages required to save this object in the swap file */ 
3861 static off_t 
rdbSavedObjectPages(robj 
*o
, FILE *fp
) { 
3862     off_t bytes 
= rdbSavedObjectLen(o
,fp
); 
3864     return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
; 
3867 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */ 
3868 static int rdbSave(char *filename
) { 
3869     dictIterator 
*di 
= NULL
; 
3874     time_t now 
= time(NULL
); 
3876     /* Wait for I/O therads to terminate, just in case this is a 
3877      * foreground-saving, to avoid seeking the swap file descriptor at the 
3879     if (server
.vm_enabled
) 
3880         waitEmptyIOJobsQueue(); 
3882     snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid()); 
3883     fp 
= fopen(tmpfile
,"w"); 
3885         redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
)); 
3888     if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
; 
3889     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
3890         redisDb 
*db 
= server
.db
+j
; 
3892         if (dictSize(d
) == 0) continue; 
3893         di 
= dictGetIterator(d
); 
3899         /* Write the SELECT DB opcode */ 
3900         if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
; 
3901         if (rdbSaveLen(fp
,j
) == -1) goto werr
; 
3903         /* Iterate this DB writing every entry */ 
3904         while((de 
= dictNext(di
)) != NULL
) { 
3905             sds keystr 
= dictGetEntryKey(de
); 
3906             robj key
, *o 
= dictGetEntryVal(de
); 
3909             initStaticStringObject(key
,keystr
); 
3910             expiretime 
= getExpire(db
,&key
); 
3912             /* Save the expire time */ 
3913             if (expiretime 
!= -1) { 
3914                 /* If this key is already expired skip it */ 
3915                 if (expiretime 
< now
) continue; 
3916                 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
; 
3917                 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
; 
3919             /* Save the key and associated value. This requires special 
3920              * handling if the value is swapped out. */ 
3921             if (!server
.vm_enabled 
|| o
->storage 
== REDIS_VM_MEMORY 
|| 
3922                                       o
->storage 
== REDIS_VM_SWAPPING
) { 
3923                 /* Save type, key, value */ 
3924                 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
; 
3925                 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
; 
3926                 if (rdbSaveObject(fp
,o
) == -1) goto werr
; 
3928                 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
3930                 /* Get a preview of the object in memory */ 
3931                 po 
= vmPreviewObject(o
); 
3932                 /* Save type, key, value */ 
3933                 if (rdbSaveType(fp
,po
->type
) == -1) goto werr
; 
3934                 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
; 
3935                 if (rdbSaveObject(fp
,po
) == -1) goto werr
; 
3936                 /* Remove the loaded object from memory */ 
3940         dictReleaseIterator(di
); 
3943     if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
; 
3945     /* Make sure data will not remain on the OS's output buffers */ 
3950     /* Use RENAME to make sure the DB file is changed atomically only 
3951      * if the generate DB file is ok. */ 
3952     if (rename(tmpfile
,filename
) == -1) { 
3953         redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
)); 
3957     redisLog(REDIS_NOTICE
,"DB saved on disk"); 
3959     server
.lastsave 
= time(NULL
); 
3965     redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
)); 
3966     if (di
) dictReleaseIterator(di
); 
3970 static int rdbSaveBackground(char *filename
) { 
3973     if (server
.bgsavechildpid 
!= -1) return REDIS_ERR
; 
3974     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
3975     if ((childpid 
= fork()) == 0) { 
3977         if (server
.vm_enabled
) vmReopenSwapFile(); 
3979         if (rdbSave(filename
) == REDIS_OK
) { 
3986         if (childpid 
== -1) { 
3987             redisLog(REDIS_WARNING
,"Can't save in background: fork: %s", 
3991         redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
); 
3992         server
.bgsavechildpid 
= childpid
; 
3993         updateDictResizePolicy(); 
3996     return REDIS_OK
; /* unreached */ 
3999 static void rdbRemoveTempFile(pid_t childpid
) { 
4002     snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
); 
4006 static int rdbLoadType(FILE *fp
) { 
4008     if (fread(&type
,1,1,fp
) == 0) return -1; 
4012 static time_t rdbLoadTime(FILE *fp
) { 
4014     if (fread(&t32
,4,1,fp
) == 0) return -1; 
4015     return (time_t) t32
; 
4018 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top 
4019  * of this file for a description of how this are stored on disk. 
4021  * isencoded is set to 1 if the readed length is not actually a length but 
4022  * an "encoding type", check the above comments for more info */ 
4023 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) { 
4024     unsigned char buf
[2]; 
4028     if (isencoded
) *isencoded 
= 0; 
4029     if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
4030     type 
= (buf
[0]&0xC0)>>6; 
4031     if (type 
== REDIS_RDB_6BITLEN
) { 
4032         /* Read a 6 bit len */ 
4034     } else if (type 
== REDIS_RDB_ENCVAL
) { 
4035         /* Read a 6 bit len encoding type */ 
4036         if (isencoded
) *isencoded 
= 1; 
4038     } else if (type 
== REDIS_RDB_14BITLEN
) { 
4039         /* Read a 14 bit len */ 
4040         if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
4041         return ((buf
[0]&0x3F)<<8)|buf
[1]; 
4043         /* Read a 32 bit len */ 
4044         if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
; 
4049 /* Load an integer-encoded object from file 'fp', with the specified 
4050  * encoding type 'enctype'. If encode is true the function may return 
4051  * an integer-encoded object as reply, otherwise the returned object 
4052  * will always be encoded as a raw string. */ 
4053 static robj 
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) { 
4054     unsigned char enc
[4]; 
4057     if (enctype 
== REDIS_RDB_ENC_INT8
) { 
4058         if (fread(enc
,1,1,fp
) == 0) return NULL
; 
4059         val 
= (signed char)enc
[0]; 
4060     } else if (enctype 
== REDIS_RDB_ENC_INT16
) { 
4062         if (fread(enc
,2,1,fp
) == 0) return NULL
; 
4063         v 
= enc
[0]|(enc
[1]<<8); 
4065     } else if (enctype 
== REDIS_RDB_ENC_INT32
) { 
4067         if (fread(enc
,4,1,fp
) == 0) return NULL
; 
4068         v 
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24); 
4071         val 
= 0; /* anti-warning */ 
4072         redisPanic("Unknown RDB integer encoding type"); 
4075         return createStringObjectFromLongLong(val
); 
4077         return createObject(REDIS_STRING
,sdsfromlonglong(val
)); 
4080 static robj 
*rdbLoadLzfStringObject(FILE*fp
) { 
4081     unsigned int len
, clen
; 
4082     unsigned char *c 
= NULL
; 
4085     if ((clen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4086     if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4087     if ((c 
= zmalloc(clen
)) == NULL
) goto err
; 
4088     if ((val 
= sdsnewlen(NULL
,len
)) == NULL
) goto err
; 
4089     if (fread(c
,clen
,1,fp
) == 0) goto err
; 
4090     if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
; 
4092     return createObject(REDIS_STRING
,val
); 
4099 static robj 
*rdbGenericLoadStringObject(FILE*fp
, int encode
) { 
4104     len 
= rdbLoadLen(fp
,&isencoded
); 
4107         case REDIS_RDB_ENC_INT8
: 
4108         case REDIS_RDB_ENC_INT16
: 
4109         case REDIS_RDB_ENC_INT32
: 
4110             return rdbLoadIntegerObject(fp
,len
,encode
); 
4111         case REDIS_RDB_ENC_LZF
: 
4112             return rdbLoadLzfStringObject(fp
); 
4114             redisPanic("Unknown RDB encoding type"); 
4118     if (len 
== REDIS_RDB_LENERR
) return NULL
; 
4119     val 
= sdsnewlen(NULL
,len
); 
4120     if (len 
&& fread(val
,len
,1,fp
) == 0) { 
4124     return createObject(REDIS_STRING
,val
); 
4127 static robj 
*rdbLoadStringObject(FILE *fp
) { 
4128     return rdbGenericLoadStringObject(fp
,0); 
4131 static robj 
*rdbLoadEncodedStringObject(FILE *fp
) { 
4132     return rdbGenericLoadStringObject(fp
,1); 
4135 /* For information about double serialization check rdbSaveDoubleValue() */ 
4136 static int rdbLoadDoubleValue(FILE *fp
, double *val
) { 
4140     if (fread(&len
,1,1,fp
) == 0) return -1; 
4142     case 255: *val 
= R_NegInf
; return 0; 
4143     case 254: *val 
= R_PosInf
; return 0; 
4144     case 253: *val 
= R_Nan
; return 0; 
4146         if (fread(buf
,len
,1,fp
) == 0) return -1; 
4148         sscanf(buf
, "%lg", val
); 
4153 /* Load a Redis object of the specified type from the specified file. 
4154  * On success a newly allocated object is returned, otherwise NULL. */ 
4155 static robj 
*rdbLoadObject(int type
, FILE *fp
) { 
4156     robj 
*o
, *ele
, *dec
; 
4159     redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
)); 
4160     if (type 
== REDIS_STRING
) { 
4161         /* Read string value */ 
4162         if ((o 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4163         o 
= tryObjectEncoding(o
); 
4164     } else if (type 
== REDIS_LIST
) { 
4165         /* Read list value */ 
4166         if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4168         /* Use a real list when there are too many entries */ 
4169         if (len 
> server
.list_max_ziplist_entries
) { 
4170             o 
= createListObject(); 
4172             o 
= createZiplistObject(); 
4175         /* Load every single element of the list */ 
4177             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4179             /* If we are using a ziplist and the value is too big, convert 
4180              * the object to a real list. */ 
4181             if (o
->encoding 
== REDIS_ENCODING_ZIPLIST 
&& 
4182                 ele
->encoding 
== REDIS_ENCODING_RAW 
&& 
4183                 sdslen(ele
->ptr
) > server
.list_max_ziplist_value
) 
4184                     listTypeConvert(o
,REDIS_ENCODING_LIST
); 
4186             if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4187                 dec 
= getDecodedObject(ele
); 
4188                 o
->ptr 
= ziplistPush(o
->ptr
,dec
->ptr
,sdslen(dec
->ptr
),REDIS_TAIL
); 
4192                 ele 
= tryObjectEncoding(ele
); 
4193                 listAddNodeTail(o
->ptr
,ele
); 
4196     } else if (type 
== REDIS_SET
) { 
4197         /* Read list/set value */ 
4198         if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4199         o 
= createSetObject(); 
4200         /* It's faster to expand the dict to the right size asap in order 
4201          * to avoid rehashing */ 
4202         if (len 
> DICT_HT_INITIAL_SIZE
) 
4203             dictExpand(o
->ptr
,len
); 
4204         /* Load every single element of the list/set */ 
4206             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4207             ele 
= tryObjectEncoding(ele
); 
4208             dictAdd((dict
*)o
->ptr
,ele
,NULL
); 
4210     } else if (type 
== REDIS_ZSET
) { 
4211         /* Read list/set value */ 
4215         if ((zsetlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4216         o 
= createZsetObject(); 
4218         /* Load every single element of the list/set */ 
4221             double *score 
= zmalloc(sizeof(double)); 
4223             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4224             ele 
= tryObjectEncoding(ele
); 
4225             if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
; 
4226             dictAdd(zs
->dict
,ele
,score
); 
4227             zslInsert(zs
->zsl
,*score
,ele
); 
4228             incrRefCount(ele
); /* added to skiplist */ 
4230     } else if (type 
== REDIS_HASH
) { 
4233         if ((hashlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4234         o 
= createHashObject(); 
4235         /* Too many entries? Use an hash table. */ 
4236         if (hashlen 
> server
.hash_max_zipmap_entries
) 
4237             convertToRealHash(o
); 
4238         /* Load every key/value, then set it into the zipmap or hash 
4239          * table, as needed. */ 
4243             if ((key 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
4244             if ((val 
= rdbLoadStringObject(fp
)) == NULL
) return NULL
; 
4245             /* If we are using a zipmap and there are too big values 
4246              * the object is converted to real hash table encoding. */ 
4247             if (o
->encoding 
!= REDIS_ENCODING_HT 
&& 
4248                (sdslen(key
->ptr
) > server
.hash_max_zipmap_value 
|| 
4249                 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
)) 
4251                     convertToRealHash(o
); 
4254             if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
4255                 unsigned char *zm 
= o
->ptr
; 
4257                 zm 
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
), 
4258                                   val
->ptr
,sdslen(val
->ptr
),NULL
); 
4263                 key 
= tryObjectEncoding(key
); 
4264                 val 
= tryObjectEncoding(val
); 
4265                 dictAdd((dict
*)o
->ptr
,key
,val
); 
4269         redisPanic("Unknown object type"); 
4274 static int rdbLoad(char *filename
) { 
4277     int type
, retval
, rdbver
; 
4278     int swap_all_values 
= 0; 
4279     redisDb 
*db 
= server
.db
+0; 
4281     time_t expiretime
, now 
= time(NULL
); 
4283     fp 
= fopen(filename
,"r"); 
4284     if (!fp
) return REDIS_ERR
; 
4285     if (fread(buf
,9,1,fp
) == 0) goto eoferr
; 
4287     if (memcmp(buf
,"REDIS",5) != 0) { 
4289         redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file"); 
4292     rdbver 
= atoi(buf
+5); 
4295         redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
); 
4304         if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
4305         if (type 
== REDIS_EXPIRETIME
) { 
4306             if ((expiretime 
= rdbLoadTime(fp
)) == -1) goto eoferr
; 
4307             /* We read the time so we need to read the object type again */ 
4308             if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
4310         if (type 
== REDIS_EOF
) break; 
4311         /* Handle SELECT DB opcode as a special case */ 
4312         if (type 
== REDIS_SELECTDB
) { 
4313             if ((dbid 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) 
4315             if (dbid 
>= (unsigned)server
.dbnum
) { 
4316                 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
); 
4319             db 
= server
.db
+dbid
; 
4323         if ((key 
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
; 
4325         if ((val 
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
; 
4326         /* Check if the key already expired */ 
4327         if (expiretime 
!= -1 && expiretime 
< now
) { 
4332         /* Add the new object in the hash table */ 
4333         retval 
= dbAdd(db
,key
,val
); 
4334         if (retval 
== REDIS_ERR
) { 
4335             redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
); 
4338         /* Set the expire time if needed */ 
4339         if (expiretime 
!= -1) setExpire(db
,key
,expiretime
); 
4341         /* Handle swapping while loading big datasets when VM is on */ 
4343         /* If we detecter we are hopeless about fitting something in memory 
4344          * we just swap every new key on disk. Directly... 
4345          * Note that's important to check for this condition before resorting 
4346          * to random sampling, otherwise we may try to swap already 
4348         if (swap_all_values
) { 
4349             dictEntry 
*de 
= dictFind(db
->dict
,key
->ptr
); 
4351             /* de may be NULL since the key already expired */ 
4354                 val 
= dictGetEntryVal(de
); 
4356                 if (val
->refcount 
== 1 && 
4357                     (vp 
= vmSwapObjectBlocking(val
)) != NULL
) 
4358                     dictGetEntryVal(de
) = vp
; 
4365         /* Flush data on disk once 32 MB of additional RAM are used... */ 
4367         if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32) 
4370         /* If we have still some hope of having some value fitting memory 
4371          * then we try random sampling. */ 
4372         if (!swap_all_values 
&& server
.vm_enabled 
&& force_swapout
) { 
4373             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
4374                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
4376             if (zmalloc_used_memory() > server
.vm_max_memory
) 
4377                 swap_all_values 
= 1; /* We are already using too much mem */ 
4383 eoferr
: /* unexpected end of file is handled here with a fatal exit */ 
4384     redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now."); 
4386     return REDIS_ERR
; /* Just to avoid warning */ 
4389 /*================================== Shutdown =============================== */ 
4390 static int prepareForShutdown() { 
4391     redisLog(REDIS_WARNING
,"User requested shutdown, saving DB..."); 
4392     /* Kill the saving child if there is a background saving in progress. 
4393        We want to avoid race conditions, for instance our saving child may 
4394        overwrite the synchronous saving did by SHUTDOWN. */ 
4395     if (server
.bgsavechildpid 
!= -1) { 
4396         redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!"); 
4397         kill(server
.bgsavechildpid
,SIGKILL
); 
4398         rdbRemoveTempFile(server
.bgsavechildpid
); 
4400     if (server
.appendonly
) { 
4401         /* Append only file: fsync() the AOF and exit */ 
4402         aof_fsync(server
.appendfd
); 
4403         if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4405         /* Snapshotting. Perform a SYNC SAVE and exit */ 
4406         if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4407             if (server
.daemonize
) 
4408                 unlink(server
.pidfile
); 
4409             redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory()); 
4411             /* Ooops.. error saving! The best we can do is to continue 
4412              * operating. Note that if there was a background saving process, 
4413              * in the next cron() Redis will be notified that the background 
4414              * saving aborted, handling special stuff like slaves pending for 
4415              * synchronization... */ 
4416             redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit"); 
4420     redisLog(REDIS_WARNING
,"Server exit now, bye bye..."); 
4424 /*================================== Commands =============================== */ 
4426 static void authCommand(redisClient 
*c
) { 
4427     if (!server
.requirepass 
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) { 
4428       c
->authenticated 
= 1; 
4429       addReply(c
,shared
.ok
); 
4431       c
->authenticated 
= 0; 
4432       addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); 
4436 static void pingCommand(redisClient 
*c
) { 
4437     addReply(c
,shared
.pong
); 
4440 static void echoCommand(redisClient 
*c
) { 
4441     addReplyBulk(c
,c
->argv
[1]); 
4444 /*=================================== Strings =============================== */ 
4446 static void setGenericCommand(redisClient 
*c
, int nx
, robj 
*key
, robj 
*val
, robj 
*expire
) { 
4448     long seconds 
= 0; /* initialized to avoid an harmness warning */ 
4451         if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
) 
4454             addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n")); 
4459     touchWatchedKey(c
->db
,key
); 
4460     if (nx
) deleteIfVolatile(c
->db
,key
); 
4461     retval 
= dbAdd(c
->db
,key
,val
); 
4462     if (retval 
== REDIS_ERR
) { 
4464             dbReplace(c
->db
,key
,val
); 
4467             addReply(c
,shared
.czero
); 
4474     removeExpire(c
->db
,key
); 
4475     if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
); 
4476     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
4479 static void setCommand(redisClient 
*c
) { 
4480     setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
); 
4483 static void setnxCommand(redisClient 
*c
) { 
4484     setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
); 
4487 static void setexCommand(redisClient 
*c
) { 
4488     setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]); 
4491 static int getGenericCommand(redisClient 
*c
) { 
4494     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
) 
4497     if (o
->type 
!= REDIS_STRING
) { 
4498         addReply(c
,shared
.wrongtypeerr
); 
4506 static void getCommand(redisClient 
*c
) { 
4507     getGenericCommand(c
); 
4510 static void getsetCommand(redisClient 
*c
) { 
4511     if (getGenericCommand(c
) == REDIS_ERR
) return; 
4512     dbReplace(c
->db
,c
->argv
[1],c
->argv
[2]); 
4513     incrRefCount(c
->argv
[2]); 
4515     removeExpire(c
->db
,c
->argv
[1]); 
4518 static void mgetCommand(redisClient 
*c
) { 
4521     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1)); 
4522     for (j 
= 1; j 
< c
->argc
; j
++) { 
4523         robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[j
]); 
4525             addReply(c
,shared
.nullbulk
); 
4527             if (o
->type 
!= REDIS_STRING
) { 
4528                 addReply(c
,shared
.nullbulk
); 
4536 static void msetGenericCommand(redisClient 
*c
, int nx
) { 
4537     int j
, busykeys 
= 0; 
4539     if ((c
->argc 
% 2) == 0) { 
4540         addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n")); 
4543     /* Handle the NX flag. The MSETNX semantic is to return zero and don't 
4544      * set nothing at all if at least one already key exists. */ 
4546         for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
4547             if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) { 
4553         addReply(c
, shared
.czero
); 
4557     for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
4558         c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]); 
4559         dbReplace(c
->db
,c
->argv
[j
],c
->argv
[j
+1]); 
4560         incrRefCount(c
->argv
[j
+1]); 
4561         removeExpire(c
->db
,c
->argv
[j
]); 
4563     server
.dirty 
+= (c
->argc
-1)/2; 
4564     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
4567 static void msetCommand(redisClient 
*c
) { 
4568     msetGenericCommand(c
,0); 
4571 static void msetnxCommand(redisClient 
*c
) { 
4572     msetGenericCommand(c
,1); 
4575 static void incrDecrCommand(redisClient 
*c
, long long incr
) { 
4579     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4580     if (o 
!= NULL 
&& checkType(c
,o
,REDIS_STRING
)) return; 
4581     if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return; 
4584     o 
= createStringObjectFromLongLong(value
); 
4585     dbReplace(c
->db
,c
->argv
[1],o
); 
4587     addReply(c
,shared
.colon
); 
4589     addReply(c
,shared
.crlf
); 
4592 static void incrCommand(redisClient 
*c
) { 
4593     incrDecrCommand(c
,1); 
4596 static void decrCommand(redisClient 
*c
) { 
4597     incrDecrCommand(c
,-1); 
4600 static void incrbyCommand(redisClient 
*c
) { 
4603     if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return; 
4604     incrDecrCommand(c
,incr
); 
4607 static void decrbyCommand(redisClient 
*c
) { 
4610     if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return; 
4611     incrDecrCommand(c
,-incr
); 
4614 static void appendCommand(redisClient 
*c
) { 
4619     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4621         /* Create the key */ 
4622         retval 
= dbAdd(c
->db
,c
->argv
[1],c
->argv
[2]); 
4623         incrRefCount(c
->argv
[2]); 
4624         totlen 
= stringObjectLen(c
->argv
[2]); 
4626         if (o
->type 
!= REDIS_STRING
) { 
4627             addReply(c
,shared
.wrongtypeerr
); 
4630         /* If the object is specially encoded or shared we have to make 
4632         if (o
->refcount 
!= 1 || o
->encoding 
!= REDIS_ENCODING_RAW
) { 
4633             robj 
*decoded 
= getDecodedObject(o
); 
4635             o 
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
)); 
4636             decrRefCount(decoded
); 
4637             dbReplace(c
->db
,c
->argv
[1],o
); 
4640         if (c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW
) { 
4641             o
->ptr 
= sdscatlen(o
->ptr
, 
4642                 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
)); 
4644             o
->ptr 
= sdscatprintf(o
->ptr
, "%ld", 
4645                 (unsigned long) c
->argv
[2]->ptr
); 
4647         totlen 
= sdslen(o
->ptr
); 
4650     addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
)); 
4653 static void substrCommand(redisClient 
*c
) { 
4655     long start 
= atoi(c
->argv
[2]->ptr
); 
4656     long end 
= atoi(c
->argv
[3]->ptr
); 
4657     size_t rangelen
, strlen
; 
4660     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4661         checkType(c
,o
,REDIS_STRING
)) return; 
4663     o 
= getDecodedObject(o
); 
4664     strlen 
= sdslen(o
->ptr
); 
4666     /* convert negative indexes */ 
4667     if (start 
< 0) start 
= strlen
+start
; 
4668     if (end 
< 0) end 
= strlen
+end
; 
4669     if (start 
< 0) start 
= 0; 
4670     if (end 
< 0) end 
= 0; 
4672     /* indexes sanity checks */ 
4673     if (start 
> end 
|| (size_t)start 
>= strlen
) { 
4674         /* Out of range start or start > end result in null reply */ 
4675         addReply(c
,shared
.nullbulk
); 
4679     if ((size_t)end 
>= strlen
) end 
= strlen
-1; 
4680     rangelen 
= (end
-start
)+1; 
4682     /* Return the result */ 
4683     addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
)); 
4684     range 
= sdsnewlen((char*)o
->ptr
+start
,rangelen
); 
4685     addReplySds(c
,range
); 
4686     addReply(c
,shared
.crlf
); 
4690 /* ========================= Type agnostic commands ========================= */ 
4692 static void delCommand(redisClient 
*c
) { 
4695     for (j 
= 1; j 
< c
->argc
; j
++) { 
4696         if (dbDelete(c
->db
,c
->argv
[j
])) { 
4697             touchWatchedKey(c
->db
,c
->argv
[j
]); 
4702     addReplyLongLong(c
,deleted
); 
4705 static void existsCommand(redisClient 
*c
) { 
4706     expireIfNeeded(c
->db
,c
->argv
[1]); 
4707     if (dbExists(c
->db
,c
->argv
[1])) { 
4708         addReply(c
, shared
.cone
); 
4710         addReply(c
, shared
.czero
); 
4714 static void selectCommand(redisClient 
*c
) { 
4715     int id 
= atoi(c
->argv
[1]->ptr
); 
4717     if (selectDb(c
,id
) == REDIS_ERR
) { 
4718         addReplySds(c
,sdsnew("-ERR invalid DB index\r\n")); 
4720         addReply(c
,shared
.ok
); 
4724 static void randomkeyCommand(redisClient 
*c
) { 
4727     if ((key 
= dbRandomKey(c
->db
)) == NULL
) { 
4728         addReply(c
,shared
.nullbulk
); 
4732     addReplyBulk(c
,key
); 
4736 static void keysCommand(redisClient 
*c
) { 
4739     sds pattern 
= c
->argv
[1]->ptr
; 
4740     int plen 
= sdslen(pattern
); 
4741     unsigned long numkeys 
= 0; 
4742     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
4744     di 
= dictGetIterator(c
->db
->dict
); 
4746     decrRefCount(lenobj
); 
4747     while((de 
= dictNext(di
)) != NULL
) { 
4748         sds key 
= dictGetEntryKey(de
); 
4751         if ((pattern
[0] == '*' && pattern
[1] == '\0') || 
4752             stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
4753             keyobj 
= createStringObject(key
,sdslen(key
)); 
4754             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
4755                 addReplyBulk(c
,keyobj
); 
4758             decrRefCount(keyobj
); 
4761     dictReleaseIterator(di
); 
4762     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
); 
4765 static void dbsizeCommand(redisClient 
*c
) { 
4767         sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
))); 
4770 static void lastsaveCommand(redisClient 
*c
) { 
4772         sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
)); 
4775 static void typeCommand(redisClient 
*c
) { 
4779     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4784         case REDIS_STRING
: type 
= "+string"; break; 
4785         case REDIS_LIST
: type 
= "+list"; break; 
4786         case REDIS_SET
: type 
= "+set"; break; 
4787         case REDIS_ZSET
: type 
= "+zset"; break; 
4788         case REDIS_HASH
: type 
= "+hash"; break; 
4789         default: type 
= "+unknown"; break; 
4792     addReplySds(c
,sdsnew(type
)); 
4793     addReply(c
,shared
.crlf
); 
4796 static void saveCommand(redisClient 
*c
) { 
4797     if (server
.bgsavechildpid 
!= -1) { 
4798         addReplySds(c
,sdsnew("-ERR background save in progress\r\n")); 
4801     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4802         addReply(c
,shared
.ok
); 
4804         addReply(c
,shared
.err
); 
4808 static void bgsaveCommand(redisClient 
*c
) { 
4809     if (server
.bgsavechildpid 
!= -1) { 
4810         addReplySds(c
,sdsnew("-ERR background save already in progress\r\n")); 
4813     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
4814         char *status 
= "+Background saving started\r\n"; 
4815         addReplySds(c
,sdsnew(status
)); 
4817         addReply(c
,shared
.err
); 
4821 static void shutdownCommand(redisClient 
*c
) { 
4822     if (prepareForShutdown() == REDIS_OK
) 
4824     addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n")); 
4827 static void renameGenericCommand(redisClient 
*c
, int nx
) { 
4830     /* To use the same key as src and dst is probably an error */ 
4831     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
4832         addReply(c
,shared
.sameobjecterr
); 
4836     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
) 
4840     deleteIfVolatile(c
->db
,c
->argv
[2]); 
4841     if (dbAdd(c
->db
,c
->argv
[2],o
) == REDIS_ERR
) { 
4844             addReply(c
,shared
.czero
); 
4847         dbReplace(c
->db
,c
->argv
[2],o
); 
4849     dbDelete(c
->db
,c
->argv
[1]); 
4850     touchWatchedKey(c
->db
,c
->argv
[2]); 
4852     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
4855 static void renameCommand(redisClient 
*c
) { 
4856     renameGenericCommand(c
,0); 
4859 static void renamenxCommand(redisClient 
*c
) { 
4860     renameGenericCommand(c
,1); 
4863 static void moveCommand(redisClient 
*c
) { 
4868     /* Obtain source and target DB pointers */ 
4871     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
4872         addReply(c
,shared
.outofrangeerr
); 
4876     selectDb(c
,srcid
); /* Back to the source DB */ 
4878     /* If the user is moving using as target the same 
4879      * DB as the source DB it is probably an error. */ 
4881         addReply(c
,shared
.sameobjecterr
); 
4885     /* Check if the element exists and get a reference */ 
4886     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4888         addReply(c
,shared
.czero
); 
4892     /* Try to add the element to the target DB */ 
4893     deleteIfVolatile(dst
,c
->argv
[1]); 
4894     if (dbAdd(dst
,c
->argv
[1],o
) == REDIS_ERR
) { 
4895         addReply(c
,shared
.czero
); 
4900     /* OK! key moved, free the entry in the source DB */ 
4901     dbDelete(src
,c
->argv
[1]); 
4903     addReply(c
,shared
.cone
); 
4906 /* =================================== Lists ================================ */ 
4909 /* Check the argument length to see if it requires us to convert the ziplist 
4910  * to a real list. Only check raw-encoded objects because integer encoded 
4911  * objects are never too long. */ 
4912 static void listTypeTryConversion(robj 
*subject
, robj 
*value
) { 
4913     if (subject
->encoding 
!= REDIS_ENCODING_ZIPLIST
) return; 
4914     if (value
->encoding 
== REDIS_ENCODING_RAW 
&& 
4915         sdslen(value
->ptr
) > server
.list_max_ziplist_value
) 
4916             listTypeConvert(subject
,REDIS_ENCODING_LIST
); 
4919 static void listTypePush(robj 
*subject
, robj 
*value
, int where
) { 
4920     /* Check if we need to convert the ziplist */ 
4921     listTypeTryConversion(subject
,value
); 
4922     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST 
&& 
4923         ziplistLen(subject
->ptr
) > server
.list_max_ziplist_entries
) 
4924             listTypeConvert(subject
,REDIS_ENCODING_LIST
); 
4926     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4927         int pos 
= (where 
== REDIS_HEAD
) ? ZIPLIST_HEAD 
: ZIPLIST_TAIL
; 
4928         value 
= getDecodedObject(value
); 
4929         subject
->ptr 
= ziplistPush(subject
->ptr
,value
->ptr
,sdslen(value
->ptr
),pos
); 
4930         decrRefCount(value
); 
4931     } else if (subject
->encoding 
== REDIS_ENCODING_LIST
) { 
4932         if (where 
== REDIS_HEAD
) { 
4933             listAddNodeHead(subject
->ptr
,value
); 
4935             listAddNodeTail(subject
->ptr
,value
); 
4937         incrRefCount(value
); 
4939         redisPanic("Unknown list encoding"); 
4943 static robj 
*listTypePop(robj 
*subject
, int where
) { 
4945     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4947         unsigned char *vstr
; 
4950         int pos 
= (where 
== REDIS_HEAD
) ? 0 : -1; 
4951         p 
= ziplistIndex(subject
->ptr
,pos
); 
4952         if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
4954                 value 
= createStringObject((char*)vstr
,vlen
); 
4956                 value 
= createStringObjectFromLongLong(vlong
); 
4958             /* We only need to delete an element when it exists */ 
4959             subject
->ptr 
= ziplistDelete(subject
->ptr
,&p
); 
4961     } else if (subject
->encoding 
== REDIS_ENCODING_LIST
) { 
4962         list 
*list 
= subject
->ptr
; 
4964         if (where 
== REDIS_HEAD
) { 
4965             ln 
= listFirst(list
); 
4967             ln 
= listLast(list
); 
4970             value 
= listNodeValue(ln
); 
4971             incrRefCount(value
); 
4972             listDelNode(list
,ln
); 
4975         redisPanic("Unknown list encoding"); 
4980 static unsigned long listTypeLength(robj 
*subject
) { 
4981     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4982         return ziplistLen(subject
->ptr
); 
4983     } else if (subject
->encoding 
== REDIS_ENCODING_LIST
) { 
4984         return listLength((list
*)subject
->ptr
); 
4986         redisPanic("Unknown list encoding"); 
4990 /* Structure to hold set iteration abstraction. */ 
4993     unsigned char encoding
; 
4994     unsigned char direction
; /* Iteration direction */ 
4999 /* Structure for an entry while iterating over a list. */ 
5001     listTypeIterator 
*li
; 
5002     unsigned char *zi
;  /* Entry in ziplist */ 
5003     listNode 
*ln
;       /* Entry in linked list */ 
5006 /* Initialize an iterator at the specified index. */ 
5007 static listTypeIterator 
*listTypeInitIterator(robj 
*subject
, int index
, unsigned char direction
) { 
5008     listTypeIterator 
*li 
= zmalloc(sizeof(listTypeIterator
)); 
5009     li
->subject 
= subject
; 
5010     li
->encoding 
= subject
->encoding
; 
5011     li
->direction 
= direction
; 
5012     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5013         li
->zi 
= ziplistIndex(subject
->ptr
,index
); 
5014     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5015         li
->ln 
= listIndex(subject
->ptr
,index
); 
5017         redisPanic("Unknown list encoding"); 
5022 /* Clean up the iterator. */ 
5023 static void listTypeReleaseIterator(listTypeIterator 
*li
) { 
5027 /* Stores pointer to current the entry in the provided entry structure 
5028  * and advances the position of the iterator. Returns 1 when the current 
5029  * entry is in fact an entry, 0 otherwise. */ 
5030 static int listTypeNext(listTypeIterator 
*li
, listTypeEntry 
*entry
) { 
5031     /* Protect from converting when iterating */ 
5032     redisAssert(li
->subject
->encoding 
== li
->encoding
); 
5035     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5037         if (entry
->zi 
!= NULL
) { 
5038             if (li
->direction 
== REDIS_TAIL
) 
5039                 li
->zi 
= ziplistNext(li
->subject
->ptr
,li
->zi
); 
5041                 li
->zi 
= ziplistPrev(li
->subject
->ptr
,li
->zi
); 
5044     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5046         if (entry
->ln 
!= NULL
) { 
5047             if (li
->direction 
== REDIS_TAIL
) 
5048                 li
->ln 
= li
->ln
->next
; 
5050                 li
->ln 
= li
->ln
->prev
; 
5054         redisPanic("Unknown list encoding"); 
5059 /* Return entry or NULL at the current position of the iterator. */ 
5060 static robj 
*listTypeGet(listTypeEntry 
*entry
) { 
5061     listTypeIterator 
*li 
= entry
->li
; 
5063     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5064         unsigned char *vstr
; 
5067         redisAssert(entry
->zi 
!= NULL
); 
5068         if (ziplistGet(entry
->zi
,&vstr
,&vlen
,&vlong
)) { 
5070                 value 
= createStringObject((char*)vstr
,vlen
); 
5072                 value 
= createStringObjectFromLongLong(vlong
); 
5075     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5076         redisAssert(entry
->ln 
!= NULL
); 
5077         value 
= listNodeValue(entry
->ln
); 
5078         incrRefCount(value
); 
5080         redisPanic("Unknown list encoding"); 
5085 /* Compare the given object with the entry at the current position. */ 
5086 static int listTypeEqual(listTypeEntry 
*entry
, robj 
*o
) { 
5087     listTypeIterator 
*li 
= entry
->li
; 
5088     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5089         redisAssert(o
->encoding 
== REDIS_ENCODING_RAW
); 
5090         return ziplistCompare(entry
->zi
,o
->ptr
,sdslen(o
->ptr
)); 
5091     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5092         return equalStringObjects(o
,listNodeValue(entry
->ln
)); 
5094         redisPanic("Unknown list encoding"); 
5098 /* Delete the element pointed to. */ 
5099 static void listTypeDelete(listTypeEntry 
*entry
) { 
5100     listTypeIterator 
*li 
= entry
->li
; 
5101     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5102         unsigned char *p 
= entry
->zi
; 
5103         li
->subject
->ptr 
= ziplistDelete(li
->subject
->ptr
,&p
); 
5105         /* Update position of the iterator depending on the direction */ 
5106         if (li
->direction 
== REDIS_TAIL
) 
5109             li
->zi 
= ziplistPrev(li
->subject
->ptr
,p
); 
5110     } else if (entry
->li
->encoding 
== REDIS_ENCODING_LIST
) { 
5112         if (li
->direction 
== REDIS_TAIL
) 
5113             next 
= entry
->ln
->next
; 
5115             next 
= entry
->ln
->prev
; 
5116         listDelNode(li
->subject
->ptr
,entry
->ln
); 
5119         redisPanic("Unknown list encoding"); 
5123 static void listTypeConvert(robj 
*subject
, int enc
) { 
5124     listTypeIterator 
*li
; 
5125     listTypeEntry entry
; 
5126     redisAssert(subject
->type 
== REDIS_LIST
); 
5128     if (enc 
== REDIS_ENCODING_LIST
) { 
5129         list 
*l 
= listCreate(); 
5130         listSetFreeMethod(l
,decrRefCount
); 
5132         /* listTypeGet returns a robj with incremented refcount */ 
5133         li 
= listTypeInitIterator(subject
,0,REDIS_TAIL
); 
5134         while (listTypeNext(li
,&entry
)) listAddNodeTail(l
,listTypeGet(&entry
)); 
5135         listTypeReleaseIterator(li
); 
5137         subject
->encoding 
= REDIS_ENCODING_LIST
; 
5138         zfree(subject
->ptr
); 
5141         redisPanic("Unsupported list conversion"); 
5145 static void pushGenericCommand(redisClient 
*c
, int where
) { 
5146     robj 
*lobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5148         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
5149             addReply(c
,shared
.cone
); 
5152         lobj 
= createZiplistObject(); 
5153         dbAdd(c
->db
,c
->argv
[1],lobj
); 
5155         if (lobj
->type 
!= REDIS_LIST
) { 
5156             addReply(c
,shared
.wrongtypeerr
); 
5159         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
5160             addReply(c
,shared
.cone
); 
5164     listTypePush(lobj
,c
->argv
[2],where
); 
5165     addReplyLongLong(c
,listTypeLength(lobj
)); 
5169 static void lpushCommand(redisClient 
*c
) { 
5170     pushGenericCommand(c
,REDIS_HEAD
); 
5173 static void rpushCommand(redisClient 
*c
) { 
5174     pushGenericCommand(c
,REDIS_TAIL
); 
5177 static void llenCommand(redisClient 
*c
) { 
5178     robj 
*o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
); 
5179     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5180     addReplyUlong(c
,listTypeLength(o
)); 
5183 static void lindexCommand(redisClient 
*c
) { 
5184     robj 
*o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
); 
5185     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5186     int index 
= atoi(c
->argv
[2]->ptr
); 
5189     if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5191         unsigned char *vstr
; 
5194         p 
= ziplistIndex(o
->ptr
,index
); 
5195         if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
5197                 value 
= createStringObject((char*)vstr
,vlen
); 
5199                 value 
= createStringObjectFromLongLong(vlong
); 
5201             addReplyBulk(c
,value
); 
5202             decrRefCount(value
); 
5204             addReply(c
,shared
.nullbulk
); 
5206     } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
5207         listNode 
*ln 
= listIndex(o
->ptr
,index
); 
5209             value 
= listNodeValue(ln
); 
5210             addReplyBulk(c
,value
); 
5212             addReply(c
,shared
.nullbulk
); 
5215         redisPanic("Unknown list encoding"); 
5219 static void lsetCommand(redisClient 
*c
) { 
5220     robj 
*o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
); 
5221     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5222     int index 
= atoi(c
->argv
[2]->ptr
); 
5223     robj 
*value 
= c
->argv
[3]; 
5225     listTypeTryConversion(o
,value
); 
5226     if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5227         unsigned char *p
, *zl 
= o
->ptr
; 
5228         p 
= ziplistIndex(zl
,index
); 
5230             addReply(c
,shared
.outofrangeerr
); 
5232             o
->ptr 
= ziplistDelete(o
->ptr
,&p
); 
5233             value 
= getDecodedObject(value
); 
5234             o
->ptr 
= ziplistInsert(o
->ptr
,p
,value
->ptr
,sdslen(value
->ptr
)); 
5235             decrRefCount(value
); 
5236             addReply(c
,shared
.ok
); 
5239     } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
5240         listNode 
*ln 
= listIndex(o
->ptr
,index
); 
5242             addReply(c
,shared
.outofrangeerr
); 
5244             decrRefCount((robj
*)listNodeValue(ln
)); 
5245             listNodeValue(ln
) = value
; 
5246             incrRefCount(value
); 
5247             addReply(c
,shared
.ok
); 
5251         redisPanic("Unknown list encoding"); 
5255 static void popGenericCommand(redisClient 
*c
, int where
) { 
5256     robj 
*o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
); 
5257     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5259     robj 
*value 
= listTypePop(o
,where
); 
5260     if (value 
== NULL
) { 
5261         addReply(c
,shared
.nullbulk
); 
5263         addReplyBulk(c
,value
); 
5264         decrRefCount(value
); 
5265         if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5270 static void lpopCommand(redisClient 
*c
) { 
5271     popGenericCommand(c
,REDIS_HEAD
); 
5274 static void rpopCommand(redisClient 
*c
) { 
5275     popGenericCommand(c
,REDIS_TAIL
); 
5278 static void lrangeCommand(redisClient 
*c
) { 
5280     int start 
= atoi(c
->argv
[2]->ptr
); 
5281     int end 
= atoi(c
->argv
[3]->ptr
); 
5284     listTypeEntry entry
; 
5286     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
5287          || checkType(c
,o
,REDIS_LIST
)) return; 
5288     llen 
= listTypeLength(o
); 
5290     /* convert negative indexes */ 
5291     if (start 
< 0) start 
= llen
+start
; 
5292     if (end 
< 0) end 
= llen
+end
; 
5293     if (start 
< 0) start 
= 0; 
5294     if (end 
< 0) end 
= 0; 
5296     /* indexes sanity checks */ 
5297     if (start 
> end 
|| start 
>= llen
) { 
5298         /* Out of range start or start > end result in empty list */ 
5299         addReply(c
,shared
.emptymultibulk
); 
5302     if (end 
>= llen
) end 
= llen
-1; 
5303     rangelen 
= (end
-start
)+1; 
5305     /* Return the result in form of a multi-bulk reply */ 
5306     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
)); 
5307     listTypeIterator 
*li 
= listTypeInitIterator(o
,start
,REDIS_TAIL
); 
5308     for (j 
= 0; j 
< rangelen
; j
++) { 
5309         redisAssert(listTypeNext(li
,&entry
)); 
5310         value 
= listTypeGet(&entry
); 
5311         addReplyBulk(c
,value
); 
5312         decrRefCount(value
); 
5314     listTypeReleaseIterator(li
); 
5317 static void ltrimCommand(redisClient 
*c
) { 
5319     int start 
= atoi(c
->argv
[2]->ptr
); 
5320     int end 
= atoi(c
->argv
[3]->ptr
); 
5322     int j
, ltrim
, rtrim
; 
5326     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL 
|| 
5327         checkType(c
,o
,REDIS_LIST
)) return; 
5328     llen 
= listTypeLength(o
); 
5330     /* convert negative indexes */ 
5331     if (start 
< 0) start 
= llen
+start
; 
5332     if (end 
< 0) end 
= llen
+end
; 
5333     if (start 
< 0) start 
= 0; 
5334     if (end 
< 0) end 
= 0; 
5336     /* indexes sanity checks */ 
5337     if (start 
> end 
|| start 
>= llen
) { 
5338         /* Out of range start or start > end result in empty list */ 
5342         if (end 
>= llen
) end 
= llen
-1; 
5347     /* Remove list elements to perform the trim */ 
5348     if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5349         o
->ptr 
= ziplistDeleteRange(o
->ptr
,0,ltrim
); 
5350         o
->ptr 
= ziplistDeleteRange(o
->ptr
,-rtrim
,rtrim
); 
5351     } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
5353         for (j 
= 0; j 
< ltrim
; j
++) { 
5354             ln 
= listFirst(list
); 
5355             listDelNode(list
,ln
); 
5357         for (j 
= 0; j 
< rtrim
; j
++) { 
5358             ln 
= listLast(list
); 
5359             listDelNode(list
,ln
); 
5362         redisPanic("Unknown list encoding"); 
5364     if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5366     addReply(c
,shared
.ok
); 
5369 static void lremCommand(redisClient 
*c
) { 
5370     robj 
*subject
, *obj 
= c
->argv
[3]; 
5371     int toremove 
= atoi(c
->argv
[2]->ptr
); 
5373     listTypeEntry entry
; 
5375     subject 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
); 
5376     if (subject 
== NULL 
|| checkType(c
,subject
,REDIS_LIST
)) return; 
5378     /* Make sure obj is raw when we're dealing with a ziplist */ 
5379     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) 
5380         obj 
= getDecodedObject(obj
); 
5382     listTypeIterator 
*li
; 
5384         toremove 
= -toremove
; 
5385         li 
= listTypeInitIterator(subject
,-1,REDIS_HEAD
); 
5387         li 
= listTypeInitIterator(subject
,0,REDIS_TAIL
); 
5390     while (listTypeNext(li
,&entry
)) { 
5391         if (listTypeEqual(&entry
,obj
)) { 
5392             listTypeDelete(&entry
); 
5395             if (toremove 
&& removed 
== toremove
) break; 
5398     listTypeReleaseIterator(li
); 
5400     /* Clean up raw encoded object */ 
5401     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) 
5404     if (listTypeLength(subject
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5405     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
)); 
5408 /* This is the semantic of this command: 
5409  *  RPOPLPUSH srclist dstlist: 
5410  *   IF LLEN(srclist) > 0 
5411  *     element = RPOP srclist 
5412  *     LPUSH dstlist element 
5419  * The idea is to be able to get an element from a list in a reliable way 
5420  * since the element is not just returned but pushed against another list 
5421  * as well. This command was originally proposed by Ezra Zygmuntowicz. 
5423 static void rpoplpushcommand(redisClient 
*c
) { 
5425     if ((sobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5426         checkType(c
,sobj
,REDIS_LIST
)) return; 
5428     if (listTypeLength(sobj
) == 0) { 
5429         addReply(c
,shared
.nullbulk
); 
5431         robj 
*dobj 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
5432         if (dobj 
&& checkType(c
,dobj
,REDIS_LIST
)) return; 
5433         value 
= listTypePop(sobj
,REDIS_TAIL
); 
5435         /* Add the element to the target list (unless it's directly 
5436          * passed to some BLPOP-ing client */ 
5437         if (!handleClientsWaitingListPush(c
,c
->argv
[2],value
)) { 
5438             /* Create the list if the key does not exist */ 
5440                 dobj 
= createZiplistObject(); 
5441                 dbAdd(c
->db
,c
->argv
[2],dobj
); 
5443             listTypePush(dobj
,value
,REDIS_HEAD
); 
5446         /* Send the element to the client as reply as well */ 
5447         addReplyBulk(c
,value
); 
5449         /* listTypePop returns an object with its refcount incremented */ 
5450         decrRefCount(value
); 
5452         /* Delete the source list when it is empty */ 
5453         if (listTypeLength(sobj
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5458 /* ==================================== Sets ================================ */ 
5460 static void saddCommand(redisClient 
*c
) { 
5463     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5465         set 
= createSetObject(); 
5466         dbAdd(c
->db
,c
->argv
[1],set
); 
5468         if (set
->type 
!= REDIS_SET
) { 
5469             addReply(c
,shared
.wrongtypeerr
); 
5473     if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) { 
5474         incrRefCount(c
->argv
[2]); 
5476         addReply(c
,shared
.cone
); 
5478         addReply(c
,shared
.czero
); 
5482 static void sremCommand(redisClient 
*c
) { 
5485     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5486         checkType(c
,set
,REDIS_SET
)) return; 
5488     if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) { 
5490         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
5491         if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5492         addReply(c
,shared
.cone
); 
5494         addReply(c
,shared
.czero
); 
5498 static void smoveCommand(redisClient 
*c
) { 
5499     robj 
*srcset
, *dstset
; 
5501     srcset 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5502     dstset 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
5504     /* If the source key does not exist return 0, if it's of the wrong type 
5506     if (srcset 
== NULL 
|| srcset
->type 
!= REDIS_SET
) { 
5507         addReply(c
, srcset 
? shared
.wrongtypeerr 
: shared
.czero
); 
5510     /* Error if the destination key is not a set as well */ 
5511     if (dstset 
&& dstset
->type 
!= REDIS_SET
) { 
5512         addReply(c
,shared
.wrongtypeerr
); 
5515     /* Remove the element from the source set */ 
5516     if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) { 
5517         /* Key not found in the src set! return zero */ 
5518         addReply(c
,shared
.czero
); 
5521     if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset 
!= dstset
) 
5522         dbDelete(c
->db
,c
->argv
[1]); 
5524     /* Add the element to the destination set */ 
5526         dstset 
= createSetObject(); 
5527         dbAdd(c
->db
,c
->argv
[2],dstset
); 
5529     if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
) 
5530         incrRefCount(c
->argv
[3]); 
5531     addReply(c
,shared
.cone
); 
5534 static void sismemberCommand(redisClient 
*c
) { 
5537     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5538         checkType(c
,set
,REDIS_SET
)) return; 
5540     if (dictFind(set
->ptr
,c
->argv
[2])) 
5541         addReply(c
,shared
.cone
); 
5543         addReply(c
,shared
.czero
); 
5546 static void scardCommand(redisClient 
*c
) { 
5550     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5551         checkType(c
,o
,REDIS_SET
)) return; 
5554     addReplyUlong(c
,dictSize(s
)); 
5557 static void spopCommand(redisClient 
*c
) { 
5561     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5562         checkType(c
,set
,REDIS_SET
)) return; 
5564     de 
= dictGetRandomKey(set
->ptr
); 
5566         addReply(c
,shared
.nullbulk
); 
5568         robj 
*ele 
= dictGetEntryKey(de
); 
5570         addReplyBulk(c
,ele
); 
5571         dictDelete(set
->ptr
,ele
); 
5572         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
5573         if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5578 static void srandmemberCommand(redisClient 
*c
) { 
5582     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5583         checkType(c
,set
,REDIS_SET
)) return; 
5585     de 
= dictGetRandomKey(set
->ptr
); 
5587         addReply(c
,shared
.nullbulk
); 
5589         robj 
*ele 
= dictGetEntryKey(de
); 
5591         addReplyBulk(c
,ele
); 
5595 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) { 
5596     dict 
**d1 
= (void*) s1
, **d2 
= (void*) s2
; 
5598     return dictSize(*d1
)-dictSize(*d2
); 
5601 static void sinterGenericCommand(redisClient 
*c
, robj 
**setskeys
, unsigned long setsnum
, robj 
*dstkey
) { 
5602     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
5605     robj 
*lenobj 
= NULL
, *dstset 
= NULL
; 
5606     unsigned long j
, cardinality 
= 0; 
5608     for (j 
= 0; j 
< setsnum
; j
++) { 
5612                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
5613                     lookupKeyRead(c
->db
,setskeys
[j
]); 
5617                 if (dbDelete(c
->db
,dstkey
)) 
5619                 addReply(c
,shared
.czero
); 
5621                 addReply(c
,shared
.emptymultibulk
); 
5625         if (setobj
->type 
!= REDIS_SET
) { 
5627             addReply(c
,shared
.wrongtypeerr
); 
5630         dv
[j
] = setobj
->ptr
; 
5632     /* Sort sets from the smallest to largest, this will improve our 
5633      * algorithm's performace */ 
5634     qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
); 
5636     /* The first thing we should output is the total number of elements... 
5637      * since this is a multi-bulk write, but at this stage we don't know 
5638      * the intersection set size, so we use a trick, append an empty object 
5639      * to the output list and save the pointer to later modify it with the 
5642         lenobj 
= createObject(REDIS_STRING
,NULL
); 
5644         decrRefCount(lenobj
); 
5646         /* If we have a target key where to store the resulting set 
5647          * create this key with an empty set inside */ 
5648         dstset 
= createSetObject(); 
5651     /* Iterate all the elements of the first (smallest) set, and test 
5652      * the element against all the other sets, if at least one set does 
5653      * not include the element it is discarded */ 
5654     di 
= dictGetIterator(dv
[0]); 
5656     while((de 
= dictNext(di
)) != NULL
) { 
5659         for (j 
= 1; j 
< setsnum
; j
++) 
5660             if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break; 
5662             continue; /* at least one set does not contain the member */ 
5663         ele 
= dictGetEntryKey(de
); 
5665             addReplyBulk(c
,ele
); 
5668             dictAdd(dstset
->ptr
,ele
,NULL
); 
5672     dictReleaseIterator(di
); 
5675         /* Store the resulting set into the target, if the intersection 
5676          * is not an empty set. */ 
5677         dbDelete(c
->db
,dstkey
); 
5678         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
5679             dbAdd(c
->db
,dstkey
,dstset
); 
5680             addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
)); 
5682             decrRefCount(dstset
); 
5683             addReply(c
,shared
.czero
); 
5687         lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
); 
5692 static void sinterCommand(redisClient 
*c
) { 
5693     sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
); 
5696 static void sinterstoreCommand(redisClient 
*c
) { 
5697     sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]); 
5700 #define REDIS_OP_UNION 0 
5701 #define REDIS_OP_DIFF 1 
5702 #define REDIS_OP_INTER 2 
5704 static void sunionDiffGenericCommand(redisClient 
*c
, robj 
**setskeys
, int setsnum
, robj 
*dstkey
, int op
) { 
5705     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
5708     robj 
*dstset 
= NULL
; 
5709     int j
, cardinality 
= 0; 
5711     for (j 
= 0; j 
< setsnum
; j
++) { 
5715                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
5716                     lookupKeyRead(c
->db
,setskeys
[j
]); 
5721         if (setobj
->type 
!= REDIS_SET
) { 
5723             addReply(c
,shared
.wrongtypeerr
); 
5726         dv
[j
] = setobj
->ptr
; 
5729     /* We need a temp set object to store our union. If the dstkey 
5730      * is not NULL (that is, we are inside an SUNIONSTORE operation) then 
5731      * this set object will be the resulting object to set into the target key*/ 
5732     dstset 
= createSetObject(); 
5734     /* Iterate all the elements of all the sets, add every element a single 
5735      * time to the result set */ 
5736     for (j 
= 0; j 
< setsnum
; j
++) { 
5737         if (op 
== REDIS_OP_DIFF 
&& j 
== 0 && !dv
[j
]) break; /* result set is empty */ 
5738         if (!dv
[j
]) continue; /* non existing keys are like empty sets */ 
5740         di 
= dictGetIterator(dv
[j
]); 
5742         while((de 
= dictNext(di
)) != NULL
) { 
5745             /* dictAdd will not add the same element multiple times */ 
5746             ele 
= dictGetEntryKey(de
); 
5747             if (op 
== REDIS_OP_UNION 
|| j 
== 0) { 
5748                 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) { 
5752             } else if (op 
== REDIS_OP_DIFF
) { 
5753                 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) { 
5758         dictReleaseIterator(di
); 
5760         /* result set is empty? Exit asap. */ 
5761         if (op 
== REDIS_OP_DIFF 
&& cardinality 
== 0) break; 
5764     /* Output the content of the resulting set, if not in STORE mode */ 
5766         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
)); 
5767         di 
= dictGetIterator(dstset
->ptr
); 
5768         while((de 
= dictNext(di
)) != NULL
) { 
5771             ele 
= dictGetEntryKey(de
); 
5772             addReplyBulk(c
,ele
); 
5774         dictReleaseIterator(di
); 
5775         decrRefCount(dstset
); 
5777         /* If we have a target key where to store the resulting set 
5778          * create this key with the result set inside */ 
5779         dbDelete(c
->db
,dstkey
); 
5780         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
5781             dbAdd(c
->db
,dstkey
,dstset
); 
5782             addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
)); 
5784             decrRefCount(dstset
); 
5785             addReply(c
,shared
.czero
); 
5792 static void sunionCommand(redisClient 
*c
) { 
5793     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
); 
5796 static void sunionstoreCommand(redisClient 
*c
) { 
5797     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
); 
5800 static void sdiffCommand(redisClient 
*c
) { 
5801     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
); 
5804 static void sdiffstoreCommand(redisClient 
*c
) { 
5805     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
); 
5808 /* ==================================== ZSets =============================== */ 
5810 /* ZSETs are ordered sets using two data structures to hold the same elements 
5811  * in order to get O(log(N)) INSERT and REMOVE operations into a sorted 
5814  * The elements are added to an hash table mapping Redis objects to scores. 
5815  * At the same time the elements are added to a skip list mapping scores 
5816  * to Redis objects (so objects are sorted by scores in this "view"). */ 
5818 /* This skiplist implementation is almost a C translation of the original 
5819  * algorithm described by William Pugh in "Skip Lists: A Probabilistic 
5820  * Alternative to Balanced Trees", modified in three ways: 
5821  * a) this implementation allows for repeated values. 
5822  * b) the comparison is not just by key (our 'score') but by satellite data. 
5823  * c) there is a back pointer, so it's a doubly linked list with the back 
5824  * pointers being only at "level 1". This allows to traverse the list 
5825  * from tail to head, useful for ZREVRANGE. */ 
5827 static zskiplistNode 
*zslCreateNode(int level
, double score
, robj 
*obj
) { 
5828     zskiplistNode 
*zn 
= zmalloc(sizeof(*zn
)); 
5830     zn
->forward 
= zmalloc(sizeof(zskiplistNode
*) * level
); 
5832         zn
->span 
= zmalloc(sizeof(unsigned int) * (level 
- 1)); 
5840 static zskiplist 
*zslCreate(void) { 
5844     zsl 
= zmalloc(sizeof(*zsl
)); 
5847     zsl
->header 
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
); 
5848     for (j 
= 0; j 
< ZSKIPLIST_MAXLEVEL
; j
++) { 
5849         zsl
->header
->forward
[j
] = NULL
; 
5851         /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */ 
5852         if (j 
< ZSKIPLIST_MAXLEVEL
-1) 
5853             zsl
->header
->span
[j
] = 0; 
5855     zsl
->header
->backward 
= NULL
; 
5860 static void zslFreeNode(zskiplistNode 
*node
) { 
5861     decrRefCount(node
->obj
); 
5862     zfree(node
->forward
); 
5867 static void zslFree(zskiplist 
*zsl
) { 
5868     zskiplistNode 
*node 
= zsl
->header
->forward
[0], *next
; 
5870     zfree(zsl
->header
->forward
); 
5871     zfree(zsl
->header
->span
); 
5874         next 
= node
->forward
[0]; 
5881 static int zslRandomLevel(void) { 
5883     while ((random()&0xFFFF) < (ZSKIPLIST_P 
* 0xFFFF)) 
5885     return (level
<ZSKIPLIST_MAXLEVEL
) ? level 
: ZSKIPLIST_MAXLEVEL
; 
5888 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5889     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5890     unsigned int rank
[ZSKIPLIST_MAXLEVEL
]; 
5894     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5895         /* store rank that is crossed to reach the insert position */ 
5896         rank
[i
] = i 
== (zsl
->level
-1) ? 0 : rank
[i
+1]; 
5898         while (x
->forward
[i
] && 
5899             (x
->forward
[i
]->score 
< score 
|| 
5900                 (x
->forward
[i
]->score 
== score 
&& 
5901                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) { 
5902             rank
[i
] += i 
> 0 ? x
->span
[i
-1] : 1; 
5907     /* we assume the key is not already inside, since we allow duplicated 
5908      * scores, and the re-insertion of score and redis object should never 
5909      * happpen since the caller of zslInsert() should test in the hash table 
5910      * if the element is already inside or not. */ 
5911     level 
= zslRandomLevel(); 
5912     if (level 
> zsl
->level
) { 
5913         for (i 
= zsl
->level
; i 
< level
; i
++) { 
5915             update
[i
] = zsl
->header
; 
5916             update
[i
]->span
[i
-1] = zsl
->length
; 
5920     x 
= zslCreateNode(level
,score
,obj
); 
5921     for (i 
= 0; i 
< level
; i
++) { 
5922         x
->forward
[i
] = update
[i
]->forward
[i
]; 
5923         update
[i
]->forward
[i
] = x
; 
5925         /* update span covered by update[i] as x is inserted here */ 
5927             x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]); 
5928             update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1; 
5932     /* increment span for untouched levels */ 
5933     for (i 
= level
; i 
< zsl
->level
; i
++) { 
5934         update
[i
]->span
[i
-1]++; 
5937     x
->backward 
= (update
[0] == zsl
->header
) ? NULL 
: update
[0]; 
5939         x
->forward
[0]->backward 
= x
; 
5945 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */ 
5946 void zslDeleteNode(zskiplist 
*zsl
, zskiplistNode 
*x
, zskiplistNode 
**update
) { 
5948     for (i 
= 0; i 
< zsl
->level
; i
++) { 
5949         if (update
[i
]->forward
[i
] == x
) { 
5951                 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1; 
5953             update
[i
]->forward
[i
] = x
->forward
[i
]; 
5955             /* invariant: i > 0, because update[0]->forward[0] 
5956              * is always equal to x */ 
5957             update
[i
]->span
[i
-1] -= 1; 
5960     if (x
->forward
[0]) { 
5961         x
->forward
[0]->backward 
= x
->backward
; 
5963         zsl
->tail 
= x
->backward
; 
5965     while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
5970 /* Delete an element with matching score/object from the skiplist. */ 
5971 static int zslDelete(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5972     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5976     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5977         while (x
->forward
[i
] && 
5978             (x
->forward
[i
]->score 
< score 
|| 
5979                 (x
->forward
[i
]->score 
== score 
&& 
5980                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
5984     /* We may have multiple elements with the same score, what we need 
5985      * is to find the element with both the right score and object. */ 
5987     if (x 
&& score 
== x
->score 
&& equalStringObjects(x
->obj
,obj
)) { 
5988         zslDeleteNode(zsl
, x
, update
); 
5992         return 0; /* not found */ 
5994     return 0; /* not found */ 
5997 /* Delete all the elements with score between min and max from the skiplist. 
5998  * Min and mx are inclusive, so a score >= min || score <= max is deleted. 
5999  * Note that this function takes the reference to the hash table view of the 
6000  * sorted set, in order to remove the elements from the hash table too. */ 
6001 static unsigned long zslDeleteRangeByScore(zskiplist 
*zsl
, double min
, double max
, dict 
*dict
) { 
6002     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
6003     unsigned long removed 
= 0; 
6007     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6008         while (x
->forward
[i
] && x
->forward
[i
]->score 
< min
) 
6012     /* We may have multiple elements with the same score, what we need 
6013      * is to find the element with both the right score and object. */ 
6015     while (x 
&& x
->score 
<= max
) { 
6016         zskiplistNode 
*next 
= x
->forward
[0]; 
6017         zslDeleteNode(zsl
, x
, update
); 
6018         dictDelete(dict
,x
->obj
); 
6023     return removed
; /* not found */ 
6026 /* Delete all the elements with rank between start and end from the skiplist. 
6027  * Start and end are inclusive. Note that start and end need to be 1-based */ 
6028 static unsigned long zslDeleteRangeByRank(zskiplist 
*zsl
, unsigned int start
, unsigned int end
, dict 
*dict
) { 
6029     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
6030     unsigned long traversed 
= 0, removed 
= 0; 
6034     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6035         while (x
->forward
[i
] && (traversed 
+ (i 
> 0 ? x
->span
[i
-1] : 1)) < start
) { 
6036             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
6044     while (x 
&& traversed 
<= end
) { 
6045         zskiplistNode 
*next 
= x
->forward
[0]; 
6046         zslDeleteNode(zsl
, x
, update
); 
6047         dictDelete(dict
,x
->obj
); 
6056 /* Find the first node having a score equal or greater than the specified one. 
6057  * Returns NULL if there is no match. */ 
6058 static zskiplistNode 
*zslFirstWithScore(zskiplist 
*zsl
, double score
) { 
6063     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6064         while (x
->forward
[i
] && x
->forward
[i
]->score 
< score
) 
6067     /* We may have multiple elements with the same score, what we need 
6068      * is to find the element with both the right score and object. */ 
6069     return x
->forward
[0]; 
6072 /* Find the rank for an element by both score and key. 
6073  * Returns 0 when the element cannot be found, rank otherwise. 
6074  * Note that the rank is 1-based due to the span of zsl->header to the 
6076 static unsigned long zslistTypeGetRank(zskiplist 
*zsl
, double score
, robj 
*o
) { 
6078     unsigned long rank 
= 0; 
6082     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6083         while (x
->forward
[i
] && 
6084             (x
->forward
[i
]->score 
< score 
|| 
6085                 (x
->forward
[i
]->score 
== score 
&& 
6086                 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) { 
6087             rank 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
6091         /* x might be equal to zsl->header, so test if obj is non-NULL */ 
6092         if (x
->obj 
&& equalStringObjects(x
->obj
,o
)) { 
6099 /* Finds an element by its rank. The rank argument needs to be 1-based. */ 
6100 zskiplistNode
* zslistTypeGetElementByRank(zskiplist 
*zsl
, unsigned long rank
) { 
6102     unsigned long traversed 
= 0; 
6106     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6107         while (x
->forward
[i
] && (traversed 
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
) 
6109             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
6112         if (traversed 
== rank
) { 
6119 /* The actual Z-commands implementations */ 
6121 /* This generic command implements both ZADD and ZINCRBY. 
6122  * scoreval is the score if the operation is a ZADD (doincrement == 0) or 
6123  * the increment if the operation is a ZINCRBY (doincrement == 1). */ 
6124 static void zaddGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*ele
, double scoreval
, int doincrement
) { 
6129     if (isnan(scoreval
)) { 
6130         addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n")); 
6134     zsetobj 
= lookupKeyWrite(c
->db
,key
); 
6135     if (zsetobj 
== NULL
) { 
6136         zsetobj 
= createZsetObject(); 
6137         dbAdd(c
->db
,key
,zsetobj
); 
6139         if (zsetobj
->type 
!= REDIS_ZSET
) { 
6140             addReply(c
,shared
.wrongtypeerr
); 
6146     /* Ok now since we implement both ZADD and ZINCRBY here the code 
6147      * needs to handle the two different conditions. It's all about setting 
6148      * '*score', that is, the new score to set, to the right value. */ 
6149     score 
= zmalloc(sizeof(double)); 
6153         /* Read the old score. If the element was not present starts from 0 */ 
6154         de 
= dictFind(zs
->dict
,ele
); 
6156             double *oldscore 
= dictGetEntryVal(de
); 
6157             *score 
= *oldscore 
+ scoreval
; 
6161         if (isnan(*score
)) { 
6163                 sdsnew("-ERR resulting score is Not A Number (nan)\r\n")); 
6165             /* Note that we don't need to check if the zset may be empty and 
6166              * should be removed here, as we can only obtain Nan as score if 
6167              * there was already an element in the sorted set. */ 
6174     /* What follows is a simple remove and re-insert operation that is common 
6175      * to both ZADD and ZINCRBY... */ 
6176     if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) { 
6177         /* case 1: New element */ 
6178         incrRefCount(ele
); /* added to hash */ 
6179         zslInsert(zs
->zsl
,*score
,ele
); 
6180         incrRefCount(ele
); /* added to skiplist */ 
6183             addReplyDouble(c
,*score
); 
6185             addReply(c
,shared
.cone
); 
6190         /* case 2: Score update operation */ 
6191         de 
= dictFind(zs
->dict
,ele
); 
6192         redisAssert(de 
!= NULL
); 
6193         oldscore 
= dictGetEntryVal(de
); 
6194         if (*score 
!= *oldscore
) { 
6197             /* Remove and insert the element in the skip list with new score */ 
6198             deleted 
= zslDelete(zs
->zsl
,*oldscore
,ele
); 
6199             redisAssert(deleted 
!= 0); 
6200             zslInsert(zs
->zsl
,*score
,ele
); 
6202             /* Update the score in the hash table */ 
6203             dictReplace(zs
->dict
,ele
,score
); 
6209             addReplyDouble(c
,*score
); 
6211             addReply(c
,shared
.czero
); 
6215 static void zaddCommand(redisClient 
*c
) { 
6218     if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return; 
6219     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0); 
6222 static void zincrbyCommand(redisClient 
*c
) { 
6225     if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return; 
6226     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1); 
6229 static void zremCommand(redisClient 
*c
) { 
6236     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6237         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
6240     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6242         addReply(c
,shared
.czero
); 
6245     /* Delete from the skiplist */ 
6246     oldscore 
= dictGetEntryVal(de
); 
6247     deleted 
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]); 
6248     redisAssert(deleted 
!= 0); 
6250     /* Delete from the hash table */ 
6251     dictDelete(zs
->dict
,c
->argv
[2]); 
6252     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
6253     if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
6255     addReply(c
,shared
.cone
); 
6258 static void zremrangebyscoreCommand(redisClient 
*c
) { 
6265     if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) || 
6266         (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return; 
6268     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6269         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
6272     deleted 
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
); 
6273     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
6274     if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
6275     server
.dirty 
+= deleted
; 
6276     addReplyLongLong(c
,deleted
); 
6279 static void zremrangebyrankCommand(redisClient 
*c
) { 
6287     if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) || 
6288         (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return; 
6290     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6291         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
6293     llen 
= zs
->zsl
->length
; 
6295     /* convert negative indexes */ 
6296     if (start 
< 0) start 
= llen
+start
; 
6297     if (end 
< 0) end 
= llen
+end
; 
6298     if (start 
< 0) start 
= 0; 
6299     if (end 
< 0) end 
= 0; 
6301     /* indexes sanity checks */ 
6302     if (start 
> end 
|| start 
>= llen
) { 
6303         addReply(c
,shared
.czero
); 
6306     if (end 
>= llen
) end 
= llen
-1; 
6308     /* increment start and end because zsl*Rank functions 
6309      * use 1-based rank */ 
6310     deleted 
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
); 
6311     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
6312     if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
6313     server
.dirty 
+= deleted
; 
6314     addReplyLongLong(c
, deleted
); 
6322 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) { 
6323     zsetopsrc 
*d1 
= (void*) s1
, *d2 
= (void*) s2
; 
6324     unsigned long size1
, size2
; 
6325     size1 
= d1
->dict 
? dictSize(d1
->dict
) : 0; 
6326     size2 
= d2
->dict 
? dictSize(d2
->dict
) : 0; 
6327     return size1 
- size2
; 
6330 #define REDIS_AGGR_SUM 1 
6331 #define REDIS_AGGR_MIN 2 
6332 #define REDIS_AGGR_MAX 3 
6333 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e)) 
6335 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) { 
6336     if (aggregate 
== REDIS_AGGR_SUM
) { 
6337         *target 
= *target 
+ val
; 
6338     } else if (aggregate 
== REDIS_AGGR_MIN
) { 
6339         *target 
= val 
< *target 
? val 
: *target
; 
6340     } else if (aggregate 
== REDIS_AGGR_MAX
) { 
6341         *target 
= val 
> *target 
? val 
: *target
; 
6344         redisPanic("Unknown ZUNION/INTER aggregate type"); 
6348 static void zunionInterGenericCommand(redisClient 
*c
, robj 
*dstkey
, int op
) { 
6350     int aggregate 
= REDIS_AGGR_SUM
; 
6357     /* expect setnum input keys to be given */ 
6358     setnum 
= atoi(c
->argv
[2]->ptr
); 
6360         addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n")); 
6364     /* test if the expected number of keys would overflow */ 
6365     if (3+setnum 
> c
->argc
) { 
6366         addReply(c
,shared
.syntaxerr
); 
6370     /* read keys to be used for input */ 
6371     src 
= zmalloc(sizeof(zsetopsrc
) * setnum
); 
6372     for (i 
= 0, j 
= 3; i 
< setnum
; i
++, j
++) { 
6373         robj 
*obj 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
6377             if (obj
->type 
== REDIS_ZSET
) { 
6378                 src
[i
].dict 
= ((zset
*)obj
->ptr
)->dict
; 
6379             } else if (obj
->type 
== REDIS_SET
) { 
6380                 src
[i
].dict 
= (obj
->ptr
); 
6383                 addReply(c
,shared
.wrongtypeerr
); 
6388         /* default all weights to 1 */ 
6389         src
[i
].weight 
= 1.0; 
6392     /* parse optional extra arguments */ 
6394         int remaining 
= c
->argc 
- j
; 
6397             if (remaining 
>= (setnum 
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) { 
6399                 for (i 
= 0; i 
< setnum
; i
++, j
++, remaining
--) { 
6400                     if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
) 
6403             } else if (remaining 
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) { 
6405                 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) { 
6406                     aggregate 
= REDIS_AGGR_SUM
; 
6407                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) { 
6408                     aggregate 
= REDIS_AGGR_MIN
; 
6409                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) { 
6410                     aggregate 
= REDIS_AGGR_MAX
; 
6413                     addReply(c
,shared
.syntaxerr
); 
6419                 addReply(c
,shared
.syntaxerr
); 
6425     /* sort sets from the smallest to largest, this will improve our 
6426      * algorithm's performance */ 
6427     qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
); 
6429     dstobj 
= createZsetObject(); 
6430     dstzset 
= dstobj
->ptr
; 
6432     if (op 
== REDIS_OP_INTER
) { 
6433         /* skip going over all entries if the smallest zset is NULL or empty */ 
6434         if (src
[0].dict 
&& dictSize(src
[0].dict
) > 0) { 
6435             /* precondition: as src[0].dict is non-empty and the zsets are ordered 
6436              * from small to large, all src[i > 0].dict are non-empty too */ 
6437             di 
= dictGetIterator(src
[0].dict
); 
6438             while((de 
= dictNext(di
)) != NULL
) { 
6439                 double *score 
= zmalloc(sizeof(double)), value
; 
6440                 *score 
= src
[0].weight 
* zunionInterDictValue(de
); 
6442                 for (j 
= 1; j 
< setnum
; j
++) { 
6443                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
6445                         value 
= src
[j
].weight 
* zunionInterDictValue(other
); 
6446                         zunionInterAggregate(score
, value
, aggregate
); 
6452                 /* skip entry when not present in every source dict */ 
6456                     robj 
*o 
= dictGetEntryKey(de
); 
6457                     dictAdd(dstzset
->dict
,o
,score
); 
6458                     incrRefCount(o
); /* added to dictionary */ 
6459                     zslInsert(dstzset
->zsl
,*score
,o
); 
6460                     incrRefCount(o
); /* added to skiplist */ 
6463             dictReleaseIterator(di
); 
6465     } else if (op 
== REDIS_OP_UNION
) { 
6466         for (i 
= 0; i 
< setnum
; i
++) { 
6467             if (!src
[i
].dict
) continue; 
6469             di 
= dictGetIterator(src
[i
].dict
); 
6470             while((de 
= dictNext(di
)) != NULL
) { 
6471                 /* skip key when already processed */ 
6472                 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue; 
6474                 double *score 
= zmalloc(sizeof(double)), value
; 
6475                 *score 
= src
[i
].weight 
* zunionInterDictValue(de
); 
6477                 /* because the zsets are sorted by size, its only possible 
6478                  * for sets at larger indices to hold this entry */ 
6479                 for (j 
= (i
+1); j 
< setnum
; j
++) { 
6480                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
6482                         value 
= src
[j
].weight 
* zunionInterDictValue(other
); 
6483                         zunionInterAggregate(score
, value
, aggregate
); 
6487                 robj 
*o 
= dictGetEntryKey(de
); 
6488                 dictAdd(dstzset
->dict
,o
,score
); 
6489                 incrRefCount(o
); /* added to dictionary */ 
6490                 zslInsert(dstzset
->zsl
,*score
,o
); 
6491                 incrRefCount(o
); /* added to skiplist */ 
6493             dictReleaseIterator(di
); 
6496         /* unknown operator */ 
6497         redisAssert(op 
== REDIS_OP_INTER 
|| op 
== REDIS_OP_UNION
); 
6500     dbDelete(c
->db
,dstkey
); 
6501     if (dstzset
->zsl
->length
) { 
6502         dbAdd(c
->db
,dstkey
,dstobj
); 
6503         addReplyLongLong(c
, dstzset
->zsl
->length
); 
6506         decrRefCount(dstobj
); 
6507         addReply(c
, shared
.czero
); 
6512 static void zunionstoreCommand(redisClient 
*c
) { 
6513     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
); 
6516 static void zinterstoreCommand(redisClient 
*c
) { 
6517     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
); 
6520 static void zrangeGenericCommand(redisClient 
*c
, int reverse
) { 
6532     if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) || 
6533         (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return; 
6535     if (c
->argc 
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) { 
6537     } else if (c
->argc 
>= 5) { 
6538         addReply(c
,shared
.syntaxerr
); 
6542     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
6543          || checkType(c
,o
,REDIS_ZSET
)) return; 
6548     /* convert negative indexes */ 
6549     if (start 
< 0) start 
= llen
+start
; 
6550     if (end 
< 0) end 
= llen
+end
; 
6551     if (start 
< 0) start 
= 0; 
6552     if (end 
< 0) end 
= 0; 
6554     /* indexes sanity checks */ 
6555     if (start 
> end 
|| start 
>= llen
) { 
6556         /* Out of range start or start > end result in empty list */ 
6557         addReply(c
,shared
.emptymultibulk
); 
6560     if (end 
>= llen
) end 
= llen
-1; 
6561     rangelen 
= (end
-start
)+1; 
6563     /* check if starting point is trivial, before searching 
6564      * the element in log(N) time */ 
6566         ln 
= start 
== 0 ? zsl
->tail 
: zslistTypeGetElementByRank(zsl
, llen
-start
); 
6569             zsl
->header
->forward
[0] : zslistTypeGetElementByRank(zsl
, start
+1); 
6572     /* Return the result in form of a multi-bulk reply */ 
6573     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n", 
6574         withscores 
? (rangelen
*2) : rangelen
)); 
6575     for (j 
= 0; j 
< rangelen
; j
++) { 
6577         addReplyBulk(c
,ele
); 
6579             addReplyDouble(c
,ln
->score
); 
6580         ln 
= reverse 
? ln
->backward 
: ln
->forward
[0]; 
6584 static void zrangeCommand(redisClient 
*c
) { 
6585     zrangeGenericCommand(c
,0); 
6588 static void zrevrangeCommand(redisClient 
*c
) { 
6589     zrangeGenericCommand(c
,1); 
6592 /* This command implements both ZRANGEBYSCORE and ZCOUNT. 
6593  * If justcount is non-zero, just the count is returned. */ 
6594 static void genericZrangebyscoreCommand(redisClient 
*c
, int justcount
) { 
6597     int minex 
= 0, maxex 
= 0; /* are min or max exclusive? */ 
6598     int offset 
= 0, limit 
= -1; 
6602     /* Parse the min-max interval. If one of the values is prefixed 
6603      * by the "(" character, it's considered "open". For instance 
6604      * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max 
6605      * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ 
6606     if (((char*)c
->argv
[2]->ptr
)[0] == '(') { 
6607         min 
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
); 
6610         min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
6612     if (((char*)c
->argv
[3]->ptr
)[0] == '(') { 
6613         max 
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
); 
6616         max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
6619     /* Parse "WITHSCORES": note that if the command was called with 
6620      * the name ZCOUNT then we are sure that c->argc == 4, so we'll never 
6621      * enter the following paths to parse WITHSCORES and LIMIT. */ 
6622     if (c
->argc 
== 5 || c
->argc 
== 8) { 
6623         if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0) 
6628     if (c
->argc 
!= (4 + withscores
) && c
->argc 
!= (7 + withscores
)) 
6632             sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); 
6637     if (c
->argc 
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) { 
6638         addReply(c
,shared
.syntaxerr
); 
6640     } else if (c
->argc 
== (7 + withscores
)) { 
6641         offset 
= atoi(c
->argv
[5]->ptr
); 
6642         limit 
= atoi(c
->argv
[6]->ptr
); 
6643         if (offset 
< 0) offset 
= 0; 
6646     /* Ok, lookup the key and get the range */ 
6647     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6649         addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
6651         if (o
->type 
!= REDIS_ZSET
) { 
6652             addReply(c
,shared
.wrongtypeerr
); 
6654             zset 
*zsetobj 
= o
->ptr
; 
6655             zskiplist 
*zsl 
= zsetobj
->zsl
; 
6657             robj 
*ele
, *lenobj 
= NULL
; 
6658             unsigned long rangelen 
= 0; 
6660             /* Get the first node with the score >= min, or with 
6661              * score > min if 'minex' is true. */ 
6662             ln 
= zslFirstWithScore(zsl
,min
); 
6663             while (minex 
&& ln 
&& ln
->score 
== min
) ln 
= ln
->forward
[0]; 
6666                 /* No element matching the speciifed interval */ 
6667                 addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
6671             /* We don't know in advance how many matching elements there 
6672              * are in the list, so we push this object that will represent 
6673              * the multi-bulk length in the output buffer, and will "fix" 
6676                 lenobj 
= createObject(REDIS_STRING
,NULL
); 
6678                 decrRefCount(lenobj
); 
6681             while(ln 
&& (maxex 
? (ln
->score 
< max
) : (ln
->score 
<= max
))) { 
6684                     ln 
= ln
->forward
[0]; 
6687                 if (limit 
== 0) break; 
6690                     addReplyBulk(c
,ele
); 
6692                         addReplyDouble(c
,ln
->score
); 
6694                 ln 
= ln
->forward
[0]; 
6696                 if (limit 
> 0) limit
--; 
6699                 addReplyLongLong(c
,(long)rangelen
); 
6701                 lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n", 
6702                      withscores 
? (rangelen
*2) : rangelen
); 
6708 static void zrangebyscoreCommand(redisClient 
*c
) { 
6709     genericZrangebyscoreCommand(c
,0); 
6712 static void zcountCommand(redisClient 
*c
) { 
6713     genericZrangebyscoreCommand(c
,1); 
6716 static void zcardCommand(redisClient 
*c
) { 
6720     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6721         checkType(c
,o
,REDIS_ZSET
)) return; 
6724     addReplyUlong(c
,zs
->zsl
->length
); 
6727 static void zscoreCommand(redisClient 
*c
) { 
6732     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6733         checkType(c
,o
,REDIS_ZSET
)) return; 
6736     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6738         addReply(c
,shared
.nullbulk
); 
6740         double *score 
= dictGetEntryVal(de
); 
6742         addReplyDouble(c
,*score
); 
6746 static void zrankGenericCommand(redisClient 
*c
, int reverse
) { 
6754     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6755         checkType(c
,o
,REDIS_ZSET
)) return; 
6759     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6761         addReply(c
,shared
.nullbulk
); 
6765     score 
= dictGetEntryVal(de
); 
6766     rank 
= zslistTypeGetRank(zsl
, *score
, c
->argv
[2]); 
6769             addReplyLongLong(c
, zsl
->length 
- rank
); 
6771             addReplyLongLong(c
, rank
-1); 
6774         addReply(c
,shared
.nullbulk
); 
6778 static void zrankCommand(redisClient 
*c
) { 
6779     zrankGenericCommand(c
, 0); 
6782 static void zrevrankCommand(redisClient 
*c
) { 
6783     zrankGenericCommand(c
, 1); 
6786 /* ========================= Hashes utility functions ======================= */ 
6787 #define REDIS_HASH_KEY 1 
6788 #define REDIS_HASH_VALUE 2 
6790 /* Check the length of a number of objects to see if we need to convert a 
6791  * zipmap to a real hash. Note that we only check string encoded objects 
6792  * as their string length can be queried in constant time. */ 
6793 static void hashTypeTryConversion(robj 
*subject
, robj 
**argv
, int start
, int end
) { 
6795     if (subject
->encoding 
!= REDIS_ENCODING_ZIPMAP
) return; 
6797     for (i 
= start
; i 
<= end
; i
++) { 
6798         if (argv
[i
]->encoding 
== REDIS_ENCODING_RAW 
&& 
6799             sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
) 
6801             convertToRealHash(subject
); 
6807 /* Encode given objects in-place when the hash uses a dict. */ 
6808 static void hashTypeTryObjectEncoding(robj 
*subject
, robj 
**o1
, robj 
**o2
) { 
6809     if (subject
->encoding 
== REDIS_ENCODING_HT
) { 
6810         if (o1
) *o1 
= tryObjectEncoding(*o1
); 
6811         if (o2
) *o2 
= tryObjectEncoding(*o2
); 
6815 /* Get the value from a hash identified by key. Returns either a string 
6816  * object or NULL if the value cannot be found. The refcount of the object 
6817  * is always increased by 1 when the value was found. */ 
6818 static robj 
*hashTypeGet(robj 
*o
, robj 
*key
) { 
6820     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6823         key 
= getDecodedObject(key
); 
6824         if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) { 
6825             value 
= createStringObject((char*)v
,vlen
); 
6829         dictEntry 
*de 
= dictFind(o
->ptr
,key
); 
6831             value 
= dictGetEntryVal(de
); 
6832             incrRefCount(value
); 
6838 /* Test if the key exists in the given hash. Returns 1 if the key 
6839  * exists and 0 when it doesn't. */ 
6840 static int hashTypeExists(robj 
*o
, robj 
*key
) { 
6841     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6842         key 
= getDecodedObject(key
); 
6843         if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) { 
6849         if (dictFind(o
->ptr
,key
) != NULL
) { 
6856 /* Add an element, discard the old if the key already exists. 
6857  * Return 0 on insert and 1 on update. */ 
6858 static int hashTypeSet(robj 
*o
, robj 
*key
, robj 
*value
) { 
6860     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6861         key 
= getDecodedObject(key
); 
6862         value 
= getDecodedObject(value
); 
6863         o
->ptr 
= zipmapSet(o
->ptr
, 
6864             key
->ptr
,sdslen(key
->ptr
), 
6865             value
->ptr
,sdslen(value
->ptr
), &update
); 
6867         decrRefCount(value
); 
6869         /* Check if the zipmap needs to be upgraded to a real hash table */ 
6870         if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
) 
6871             convertToRealHash(o
); 
6873         if (dictReplace(o
->ptr
,key
,value
)) { 
6880         incrRefCount(value
); 
6885 /* Delete an element from a hash. 
6886  * Return 1 on deleted and 0 on not found. */ 
6887 static int hashTypeDelete(robj 
*o
, robj 
*key
) { 
6889     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6890         key 
= getDecodedObject(key
); 
6891         o
->ptr 
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
); 
6894         deleted 
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
; 
6895         /* Always check if the dictionary needs a resize after a delete. */ 
6896         if (deleted 
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
); 
6901 /* Return the number of elements in a hash. */ 
6902 static unsigned long hashTypeLength(robj 
*o
) { 
6903     return (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) ? 
6904         zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
); 
6907 /* Structure to hold hash iteration abstration. Note that iteration over 
6908  * hashes involves both fields and values. Because it is possible that 
6909  * not both are required, store pointers in the iterator to avoid 
6910  * unnecessary memory allocation for fields/values. */ 
6914     unsigned char *zk
, *zv
; 
6915     unsigned int zklen
, zvlen
; 
6921 static hashTypeIterator 
*hashTypeInitIterator(robj 
*subject
) { 
6922     hashTypeIterator 
*hi 
= zmalloc(sizeof(hashTypeIterator
)); 
6923     hi
->encoding 
= subject
->encoding
; 
6924     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6925         hi
->zi 
= zipmapRewind(subject
->ptr
); 
6926     } else if (hi
->encoding 
== REDIS_ENCODING_HT
) { 
6927         hi
->di 
= dictGetIterator(subject
->ptr
); 
6934 static void hashTypeReleaseIterator(hashTypeIterator 
*hi
) { 
6935     if (hi
->encoding 
== REDIS_ENCODING_HT
) { 
6936         dictReleaseIterator(hi
->di
); 
6941 /* Move to the next entry in the hash. Return REDIS_OK when the next entry 
6942  * could be found and REDIS_ERR when the iterator reaches the end. */ 
6943 static int hashTypeNext(hashTypeIterator 
*hi
) { 
6944     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6945         if ((hi
->zi 
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
, 
6946             &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
; 
6948         if ((hi
->de 
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
; 
6953 /* Get key or value object at current iteration position. 
6954  * This increases the refcount of the field object by 1. */ 
6955 static robj 
*hashTypeCurrent(hashTypeIterator 
*hi
, int what
) { 
6957     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6958         if (what 
& REDIS_HASH_KEY
) { 
6959             o 
= createStringObject((char*)hi
->zk
,hi
->zklen
); 
6961             o 
= createStringObject((char*)hi
->zv
,hi
->zvlen
); 
6964         if (what 
& REDIS_HASH_KEY
) { 
6965             o 
= dictGetEntryKey(hi
->de
); 
6967             o 
= dictGetEntryVal(hi
->de
); 
6974 static robj 
*hashTypeLookupWriteOrCreate(redisClient 
*c
, robj 
*key
) { 
6975     robj 
*o 
= lookupKeyWrite(c
->db
,key
); 
6977         o 
= createHashObject(); 
6980         if (o
->type 
!= REDIS_HASH
) { 
6981             addReply(c
,shared
.wrongtypeerr
); 
6988 /* ============================= Hash commands ============================== */ 
6989 static void hsetCommand(redisClient 
*c
) { 
6993     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
6994     hashTypeTryConversion(o
,c
->argv
,2,3); 
6995     hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]); 
6996     update 
= hashTypeSet(o
,c
->argv
[2],c
->argv
[3]); 
6997     addReply(c
, update 
? shared
.czero 
: shared
.cone
); 
7001 static void hsetnxCommand(redisClient 
*c
) { 
7003     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
7004     hashTypeTryConversion(o
,c
->argv
,2,3); 
7006     if (hashTypeExists(o
, c
->argv
[2])) { 
7007         addReply(c
, shared
.czero
); 
7009         hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]); 
7010         hashTypeSet(o
,c
->argv
[2],c
->argv
[3]); 
7011         addReply(c
, shared
.cone
); 
7016 static void hmsetCommand(redisClient 
*c
) { 
7020     if ((c
->argc 
% 2) == 1) { 
7021         addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n")); 
7025     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
7026     hashTypeTryConversion(o
,c
->argv
,2,c
->argc
-1); 
7027     for (i 
= 2; i 
< c
->argc
; i 
+= 2) { 
7028         hashTypeTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]); 
7029         hashTypeSet(o
,c
->argv
[i
],c
->argv
[i
+1]); 
7031     addReply(c
, shared
.ok
); 
7035 static void hincrbyCommand(redisClient 
*c
) { 
7036     long long value
, incr
; 
7037     robj 
*o
, *current
, *new; 
7039     if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return; 
7040     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
7041     if ((current 
= hashTypeGet(o
,c
->argv
[2])) != NULL
) { 
7042         if (getLongLongFromObjectOrReply(c
,current
,&value
, 
7043             "hash value is not an integer") != REDIS_OK
) { 
7044             decrRefCount(current
); 
7047         decrRefCount(current
); 
7053     new = createStringObjectFromLongLong(value
); 
7054     hashTypeTryObjectEncoding(o
,&c
->argv
[2],NULL
); 
7055     hashTypeSet(o
,c
->argv
[2],new); 
7057     addReplyLongLong(c
,value
); 
7061 static void hgetCommand(redisClient 
*c
) { 
7063     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
7064         checkType(c
,o
,REDIS_HASH
)) return; 
7066     if ((value 
= hashTypeGet(o
,c
->argv
[2])) != NULL
) { 
7067         addReplyBulk(c
,value
); 
7068         decrRefCount(value
); 
7070         addReply(c
,shared
.nullbulk
); 
7074 static void hmgetCommand(redisClient 
*c
) { 
7077     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
7078     if (o 
!= NULL 
&& o
->type 
!= REDIS_HASH
) { 
7079         addReply(c
,shared
.wrongtypeerr
); 
7082     /* Note the check for o != NULL happens inside the loop. This is 
7083      * done because objects that cannot be found are considered to be 
7084      * an empty hash. The reply should then be a series of NULLs. */ 
7085     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2)); 
7086     for (i 
= 2; i 
< c
->argc
; i
++) { 
7087         if (o 
!= NULL 
&& (value 
= hashTypeGet(o
,c
->argv
[i
])) != NULL
) { 
7088             addReplyBulk(c
,value
); 
7089             decrRefCount(value
); 
7091             addReply(c
,shared
.nullbulk
); 
7096 static void hdelCommand(redisClient 
*c
) { 
7098     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
7099         checkType(c
,o
,REDIS_HASH
)) return; 
7101     if (hashTypeDelete(o
,c
->argv
[2])) { 
7102         if (hashTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
7103         addReply(c
,shared
.cone
); 
7106         addReply(c
,shared
.czero
); 
7110 static void hlenCommand(redisClient 
*c
) { 
7112     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
7113         checkType(c
,o
,REDIS_HASH
)) return; 
7115     addReplyUlong(c
,hashTypeLength(o
)); 
7118 static void genericHgetallCommand(redisClient 
*c
, int flags
) { 
7119     robj 
*o
, *lenobj
, *obj
; 
7120     unsigned long count 
= 0; 
7121     hashTypeIterator 
*hi
; 
7123     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
7124         || checkType(c
,o
,REDIS_HASH
)) return; 
7126     lenobj 
= createObject(REDIS_STRING
,NULL
); 
7128     decrRefCount(lenobj
); 
7130     hi 
= hashTypeInitIterator(o
); 
7131     while (hashTypeNext(hi
) != REDIS_ERR
) { 
7132         if (flags 
& REDIS_HASH_KEY
) { 
7133             obj 
= hashTypeCurrent(hi
,REDIS_HASH_KEY
); 
7134             addReplyBulk(c
,obj
); 
7138         if (flags 
& REDIS_HASH_VALUE
) { 
7139             obj 
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
); 
7140             addReplyBulk(c
,obj
); 
7145     hashTypeReleaseIterator(hi
); 
7147     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",count
); 
7150 static void hkeysCommand(redisClient 
*c
) { 
7151     genericHgetallCommand(c
,REDIS_HASH_KEY
); 
7154 static void hvalsCommand(redisClient 
*c
) { 
7155     genericHgetallCommand(c
,REDIS_HASH_VALUE
); 
7158 static void hgetallCommand(redisClient 
*c
) { 
7159     genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
); 
7162 static void hexistsCommand(redisClient 
*c
) { 
7164     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
7165         checkType(c
,o
,REDIS_HASH
)) return; 
7167     addReply(c
, hashTypeExists(o
,c
->argv
[2]) ? shared
.cone 
: shared
.czero
); 
7170 static void convertToRealHash(robj 
*o
) { 
7171     unsigned char *key
, *val
, *p
, *zm 
= o
->ptr
; 
7172     unsigned int klen
, vlen
; 
7173     dict 
*dict 
= dictCreate(&hashDictType
,NULL
); 
7175     assert(o
->type 
== REDIS_HASH 
&& o
->encoding 
!= REDIS_ENCODING_HT
); 
7176     p 
= zipmapRewind(zm
); 
7177     while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
7178         robj 
*keyobj
, *valobj
; 
7180         keyobj 
= createStringObject((char*)key
,klen
); 
7181         valobj 
= createStringObject((char*)val
,vlen
); 
7182         keyobj 
= tryObjectEncoding(keyobj
); 
7183         valobj 
= tryObjectEncoding(valobj
); 
7184         dictAdd(dict
,keyobj
,valobj
); 
7186     o
->encoding 
= REDIS_ENCODING_HT
; 
7191 /* ========================= Non type-specific commands  ==================== */ 
7193 static void flushdbCommand(redisClient 
*c
) { 
7194     server
.dirty 
+= dictSize(c
->db
->dict
); 
7195     touchWatchedKeysOnFlush(c
->db
->id
); 
7196     dictEmpty(c
->db
->dict
); 
7197     dictEmpty(c
->db
->expires
); 
7198     addReply(c
,shared
.ok
); 
7201 static void flushallCommand(redisClient 
*c
) { 
7202     touchWatchedKeysOnFlush(-1); 
7203     server
.dirty 
+= emptyDb(); 
7204     addReply(c
,shared
.ok
); 
7205     if (server
.bgsavechildpid 
!= -1) { 
7206         kill(server
.bgsavechildpid
,SIGKILL
); 
7207         rdbRemoveTempFile(server
.bgsavechildpid
); 
7209     rdbSave(server
.dbfilename
); 
7213 static redisSortOperation 
*createSortOperation(int type
, robj 
*pattern
) { 
7214     redisSortOperation 
*so 
= zmalloc(sizeof(*so
)); 
7216     so
->pattern 
= pattern
; 
7220 /* Return the value associated to the key with a name obtained 
7221  * substituting the first occurence of '*' in 'pattern' with 'subst'. 
7222  * The returned object will always have its refcount increased by 1 
7223  * when it is non-NULL. */ 
7224 static robj 
*lookupKeyByPattern(redisDb 
*db
, robj 
*pattern
, robj 
*subst
) { 
7227     robj keyobj
, fieldobj
, *o
; 
7228     int prefixlen
, sublen
, postfixlen
, fieldlen
; 
7229     /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */ 
7233         char buf
[REDIS_SORTKEY_MAX
+1]; 
7234     } keyname
, fieldname
; 
7236     /* If the pattern is "#" return the substitution object itself in order 
7237      * to implement the "SORT ... GET #" feature. */ 
7238     spat 
= pattern
->ptr
; 
7239     if (spat
[0] == '#' && spat
[1] == '\0') { 
7240         incrRefCount(subst
); 
7244     /* The substitution object may be specially encoded. If so we create 
7245      * a decoded object on the fly. Otherwise getDecodedObject will just 
7246      * increment the ref count, that we'll decrement later. */ 
7247     subst 
= getDecodedObject(subst
); 
7250     if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
; 
7251     p 
= strchr(spat
,'*'); 
7253         decrRefCount(subst
); 
7257     /* Find out if we're dealing with a hash dereference. */ 
7258     if ((f 
= strstr(p
+1, "->")) != NULL
) { 
7259         fieldlen 
= sdslen(spat
)-(f
-spat
); 
7260         /* this also copies \0 character */ 
7261         memcpy(fieldname
.buf
,f
+2,fieldlen
-1); 
7262         fieldname
.len 
= fieldlen
-2; 
7268     sublen 
= sdslen(ssub
); 
7269     postfixlen 
= sdslen(spat
)-(prefixlen
+1)-fieldlen
; 
7270     memcpy(keyname
.buf
,spat
,prefixlen
); 
7271     memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
); 
7272     memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
); 
7273     keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0'; 
7274     keyname
.len 
= prefixlen
+sublen
+postfixlen
; 
7275     decrRefCount(subst
); 
7277     /* Lookup substituted key */ 
7278     initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2)); 
7279     o 
= lookupKeyRead(db
,&keyobj
); 
7280     if (o 
== NULL
) return NULL
; 
7283         if (o
->type 
!= REDIS_HASH 
|| fieldname
.len 
< 1) return NULL
; 
7285         /* Retrieve value from hash by the field name. This operation 
7286          * already increases the refcount of the returned object. */ 
7287         initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2)); 
7288         o 
= hashTypeGet(o
, &fieldobj
); 
7290         if (o
->type 
!= REDIS_STRING
) return NULL
; 
7292         /* Every object that this function returns needs to have its refcount 
7293          * increased. sortCommand decreases it again. */ 
7300 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with 
7301  * the additional parameter is not standard but a BSD-specific we have to 
7302  * pass sorting parameters via the global 'server' structure */ 
7303 static int sortCompare(const void *s1
, const void *s2
) { 
7304     const redisSortObject 
*so1 
= s1
, *so2 
= s2
; 
7307     if (!server
.sort_alpha
) { 
7308         /* Numeric sorting. Here it's trivial as we precomputed scores */ 
7309         if (so1
->u
.score 
> so2
->u
.score
) { 
7311         } else if (so1
->u
.score 
< so2
->u
.score
) { 
7317         /* Alphanumeric sorting */ 
7318         if (server
.sort_bypattern
) { 
7319             if (!so1
->u
.cmpobj 
|| !so2
->u
.cmpobj
) { 
7320                 /* At least one compare object is NULL */ 
7321                 if (so1
->u
.cmpobj 
== so2
->u
.cmpobj
) 
7323                 else if (so1
->u
.cmpobj 
== NULL
) 
7328                 /* We have both the objects, use strcoll */ 
7329                 cmp 
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
); 
7332             /* Compare elements directly. */ 
7333             cmp 
= compareStringObjects(so1
->obj
,so2
->obj
); 
7336     return server
.sort_desc 
? -cmp 
: cmp
; 
7339 /* The SORT command is the most complex command in Redis. Warning: this code 
7340  * is optimized for speed and a bit less for readability */ 
7341 static void sortCommand(redisClient 
*c
) { 
7343     unsigned int outputlen 
= 0; 
7344     int desc 
= 0, alpha 
= 0; 
7345     int limit_start 
= 0, limit_count 
= -1, start
, end
; 
7346     int j
, dontsort 
= 0, vectorlen
; 
7347     int getop 
= 0; /* GET operation counter */ 
7348     robj 
*sortval
, *sortby 
= NULL
, *storekey 
= NULL
; 
7349     redisSortObject 
*vector
; /* Resulting vector to sort */ 
7351     /* Lookup the key to sort. It must be of the right types */ 
7352     sortval 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
7353     if (sortval 
== NULL
) { 
7354         addReply(c
,shared
.emptymultibulk
); 
7357     if (sortval
->type 
!= REDIS_SET 
&& sortval
->type 
!= REDIS_LIST 
&& 
7358         sortval
->type 
!= REDIS_ZSET
) 
7360         addReply(c
,shared
.wrongtypeerr
); 
7364     /* Create a list of operations to perform for every sorted element. 
7365      * Operations can be GET/DEL/INCR/DECR */ 
7366     operations 
= listCreate(); 
7367     listSetFreeMethod(operations
,zfree
); 
7370     /* Now we need to protect sortval incrementing its count, in the future 
7371      * SORT may have options able to overwrite/delete keys during the sorting 
7372      * and the sorted key itself may get destroied */ 
7373     incrRefCount(sortval
); 
7375     /* The SORT command has an SQL-alike syntax, parse it */ 
7376     while(j 
< c
->argc
) { 
7377         int leftargs 
= c
->argc
-j
-1; 
7378         if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) { 
7380         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) { 
7382         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) { 
7384         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs 
>= 2) { 
7385             limit_start 
= atoi(c
->argv
[j
+1]->ptr
); 
7386             limit_count 
= atoi(c
->argv
[j
+2]->ptr
); 
7388         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs 
>= 1) { 
7389             storekey 
= c
->argv
[j
+1]; 
7391         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs 
>= 1) { 
7392             sortby 
= c
->argv
[j
+1]; 
7393             /* If the BY pattern does not contain '*', i.e. it is constant, 
7394              * we don't need to sort nor to lookup the weight keys. */ 
7395             if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort 
= 1; 
7397         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs 
>= 1) { 
7398             listAddNodeTail(operations
,createSortOperation( 
7399                 REDIS_SORT_GET
,c
->argv
[j
+1])); 
7403             decrRefCount(sortval
); 
7404             listRelease(operations
); 
7405             addReply(c
,shared
.syntaxerr
); 
7411     /* Load the sorting vector with all the objects to sort */ 
7412     switch(sortval
->type
) { 
7413     case REDIS_LIST
: vectorlen 
= listTypeLength(sortval
); break; 
7414     case REDIS_SET
: vectorlen 
=  dictSize((dict
*)sortval
->ptr
); break; 
7415     case REDIS_ZSET
: vectorlen 
= dictSize(((zset
*)sortval
->ptr
)->dict
); break; 
7416     default: vectorlen 
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */ 
7418     vector 
= zmalloc(sizeof(redisSortObject
)*vectorlen
); 
7421     if (sortval
->type 
== REDIS_LIST
) { 
7422         listTypeIterator 
*li 
= listTypeInitIterator(sortval
,0,REDIS_TAIL
); 
7423         listTypeEntry entry
; 
7424         while(listTypeNext(li
,&entry
)) { 
7425             vector
[j
].obj 
= listTypeGet(&entry
); 
7426             vector
[j
].u
.score 
= 0; 
7427             vector
[j
].u
.cmpobj 
= NULL
; 
7430         listTypeReleaseIterator(li
); 
7436         if (sortval
->type 
== REDIS_SET
) { 
7439             zset 
*zs 
= sortval
->ptr
; 
7443         di 
= dictGetIterator(set
); 
7444         while((setele 
= dictNext(di
)) != NULL
) { 
7445             vector
[j
].obj 
= dictGetEntryKey(setele
); 
7446             vector
[j
].u
.score 
= 0; 
7447             vector
[j
].u
.cmpobj 
= NULL
; 
7450         dictReleaseIterator(di
); 
7452     redisAssert(j 
== vectorlen
); 
7454     /* Now it's time to load the right scores in the sorting vector */ 
7455     if (dontsort 
== 0) { 
7456         for (j 
= 0; j 
< vectorlen
; j
++) { 
7459                 /* lookup value to sort by */ 
7460                 byval 
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
); 
7461                 if (!byval
) continue; 
7463                 /* use object itself to sort by */ 
7464                 byval 
= vector
[j
].obj
; 
7468                 if (sortby
) vector
[j
].u
.cmpobj 
= getDecodedObject(byval
); 
7470                 if (byval
->encoding 
== REDIS_ENCODING_RAW
) { 
7471                     vector
[j
].u
.score 
= strtod(byval
->ptr
,NULL
); 
7472                 } else if (byval
->encoding 
== REDIS_ENCODING_INT
) { 
7473                     /* Don't need to decode the object if it's 
7474                      * integer-encoded (the only encoding supported) so 
7475                      * far. We can just cast it */ 
7476                     vector
[j
].u
.score 
= (long)byval
->ptr
; 
7478                     redisAssert(1 != 1); 
7482             /* when the object was retrieved using lookupKeyByPattern, 
7483              * its refcount needs to be decreased. */ 
7485                 decrRefCount(byval
); 
7490     /* We are ready to sort the vector... perform a bit of sanity check 
7491      * on the LIMIT option too. We'll use a partial version of quicksort. */ 
7492     start 
= (limit_start 
< 0) ? 0 : limit_start
; 
7493     end 
= (limit_count 
< 0) ? vectorlen
-1 : start
+limit_count
-1; 
7494     if (start 
>= vectorlen
) { 
7495         start 
= vectorlen
-1; 
7498     if (end 
>= vectorlen
) end 
= vectorlen
-1; 
7500     if (dontsort 
== 0) { 
7501         server
.sort_desc 
= desc
; 
7502         server
.sort_alpha 
= alpha
; 
7503         server
.sort_bypattern 
= sortby 
? 1 : 0; 
7504         if (sortby 
&& (start 
!= 0 || end 
!= vectorlen
-1)) 
7505             pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
); 
7507             qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
); 
7510     /* Send command output to the output buffer, performing the specified 
7511      * GET/DEL/INCR/DECR operations if any. */ 
7512     outputlen 
= getop 
? getop
*(end
-start
+1) : end
-start
+1; 
7513     if (storekey 
== NULL
) { 
7514         /* STORE option not specified, sent the sorting result to client */ 
7515         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
)); 
7516         for (j 
= start
; j 
<= end
; j
++) { 
7520             if (!getop
) addReplyBulk(c
,vector
[j
].obj
); 
7521             listRewind(operations
,&li
); 
7522             while((ln 
= listNext(&li
))) { 
7523                 redisSortOperation 
*sop 
= ln
->value
; 
7524                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
7527                 if (sop
->type 
== REDIS_SORT_GET
) { 
7529                         addReply(c
,shared
.nullbulk
); 
7531                         addReplyBulk(c
,val
); 
7535                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
7540         robj 
*sobj 
= createZiplistObject(); 
7542         /* STORE option specified, set the sorting result as a List object */ 
7543         for (j 
= start
; j 
<= end
; j
++) { 
7548                 listTypePush(sobj
,vector
[j
].obj
,REDIS_TAIL
); 
7550                 listRewind(operations
,&li
); 
7551                 while((ln 
= listNext(&li
))) { 
7552                     redisSortOperation 
*sop 
= ln
->value
; 
7553                     robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
7556                     if (sop
->type 
== REDIS_SORT_GET
) { 
7557                         if (!val
) val 
= createStringObject("",0); 
7559                         /* listTypePush does an incrRefCount, so we should take care 
7560                          * care of the incremented refcount caused by either 
7561                          * lookupKeyByPattern or createStringObject("",0) */ 
7562                         listTypePush(sobj
,val
,REDIS_TAIL
); 
7566                         redisAssert(sop
->type 
== REDIS_SORT_GET
); 
7571         dbReplace(c
->db
,storekey
,sobj
); 
7572         /* Note: we add 1 because the DB is dirty anyway since even if the 
7573          * SORT result is empty a new key is set and maybe the old content 
7575         server
.dirty 
+= 1+outputlen
; 
7576         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
)); 
7580     if (sortval
->type 
== REDIS_LIST
) 
7581         for (j 
= 0; j 
< vectorlen
; j
++) 
7582             decrRefCount(vector
[j
].obj
); 
7583     decrRefCount(sortval
); 
7584     listRelease(operations
); 
7585     for (j 
= 0; j 
< vectorlen
; j
++) { 
7586         if (alpha 
&& vector
[j
].u
.cmpobj
) 
7587             decrRefCount(vector
[j
].u
.cmpobj
); 
7592 /* Convert an amount of bytes into a human readable string in the form 
7593  * of 100B, 2G, 100M, 4K, and so forth. */ 
7594 static void bytesToHuman(char *s
, unsigned long long n
) { 
7599         sprintf(s
,"%lluB",n
); 
7601     } else if (n 
< (1024*1024)) { 
7602         d 
= (double)n
/(1024); 
7603         sprintf(s
,"%.2fK",d
); 
7604     } else if (n 
< (1024LL*1024*1024)) { 
7605         d 
= (double)n
/(1024*1024); 
7606         sprintf(s
,"%.2fM",d
); 
7607     } else if (n 
< (1024LL*1024*1024*1024)) { 
7608         d 
= (double)n
/(1024LL*1024*1024); 
7609         sprintf(s
,"%.2fG",d
); 
7613 /* Create the string returned by the INFO command. This is decoupled 
7614  * by the INFO command itself as we need to report the same information 
7615  * on memory corruption problems. */ 
7616 static sds 
genRedisInfoString(void) { 
7618     time_t uptime 
= time(NULL
)-server
.stat_starttime
; 
7622     bytesToHuman(hmem
,zmalloc_used_memory()); 
7623     info 
= sdscatprintf(sdsempty(), 
7624         "redis_version:%s\r\n" 
7625         "redis_git_sha1:%s\r\n" 
7626         "redis_git_dirty:%d\r\n" 
7628         "multiplexing_api:%s\r\n" 
7629         "process_id:%ld\r\n" 
7630         "uptime_in_seconds:%ld\r\n" 
7631         "uptime_in_days:%ld\r\n" 
7632         "connected_clients:%d\r\n" 
7633         "connected_slaves:%d\r\n" 
7634         "blocked_clients:%d\r\n" 
7635         "used_memory:%zu\r\n" 
7636         "used_memory_human:%s\r\n" 
7637         "changes_since_last_save:%lld\r\n" 
7638         "bgsave_in_progress:%d\r\n" 
7639         "last_save_time:%ld\r\n" 
7640         "bgrewriteaof_in_progress:%d\r\n" 
7641         "total_connections_received:%lld\r\n" 
7642         "total_commands_processed:%lld\r\n" 
7643         "expired_keys:%lld\r\n" 
7644         "hash_max_zipmap_entries:%zu\r\n" 
7645         "hash_max_zipmap_value:%zu\r\n" 
7646         "pubsub_channels:%ld\r\n" 
7647         "pubsub_patterns:%u\r\n" 
7652         strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0, 
7653         (sizeof(long) == 8) ? "64" : "32", 
7658         listLength(server
.clients
)-listLength(server
.slaves
), 
7659         listLength(server
.slaves
), 
7660         server
.blpop_blocked_clients
, 
7661         zmalloc_used_memory(), 
7664         server
.bgsavechildpid 
!= -1, 
7666         server
.bgrewritechildpid 
!= -1, 
7667         server
.stat_numconnections
, 
7668         server
.stat_numcommands
, 
7669         server
.stat_expiredkeys
, 
7670         server
.hash_max_zipmap_entries
, 
7671         server
.hash_max_zipmap_value
, 
7672         dictSize(server
.pubsub_channels
), 
7673         listLength(server
.pubsub_patterns
), 
7674         server
.vm_enabled 
!= 0, 
7675         server
.masterhost 
== NULL 
? "master" : "slave" 
7677     if (server
.masterhost
) { 
7678         info 
= sdscatprintf(info
, 
7679             "master_host:%s\r\n" 
7680             "master_port:%d\r\n" 
7681             "master_link_status:%s\r\n" 
7682             "master_last_io_seconds_ago:%d\r\n" 
7685             (server
.replstate 
== REDIS_REPL_CONNECTED
) ? 
7687             server
.master 
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1 
7690     if (server
.vm_enabled
) { 
7692         info 
= sdscatprintf(info
, 
7693             "vm_conf_max_memory:%llu\r\n" 
7694             "vm_conf_page_size:%llu\r\n" 
7695             "vm_conf_pages:%llu\r\n" 
7696             "vm_stats_used_pages:%llu\r\n" 
7697             "vm_stats_swapped_objects:%llu\r\n" 
7698             "vm_stats_swappin_count:%llu\r\n" 
7699             "vm_stats_swappout_count:%llu\r\n" 
7700             "vm_stats_io_newjobs_len:%lu\r\n" 
7701             "vm_stats_io_processing_len:%lu\r\n" 
7702             "vm_stats_io_processed_len:%lu\r\n" 
7703             "vm_stats_io_active_threads:%lu\r\n" 
7704             "vm_stats_blocked_clients:%lu\r\n" 
7705             ,(unsigned long long) server
.vm_max_memory
, 
7706             (unsigned long long) server
.vm_page_size
, 
7707             (unsigned long long) server
.vm_pages
, 
7708             (unsigned long long) server
.vm_stats_used_pages
, 
7709             (unsigned long long) server
.vm_stats_swapped_objects
, 
7710             (unsigned long long) server
.vm_stats_swapins
, 
7711             (unsigned long long) server
.vm_stats_swapouts
, 
7712             (unsigned long) listLength(server
.io_newjobs
), 
7713             (unsigned long) listLength(server
.io_processing
), 
7714             (unsigned long) listLength(server
.io_processed
), 
7715             (unsigned long) server
.io_active_threads
, 
7716             (unsigned long) server
.vm_blocked_clients
 
7720     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7721         long long keys
, vkeys
; 
7723         keys 
= dictSize(server
.db
[j
].dict
); 
7724         vkeys 
= dictSize(server
.db
[j
].expires
); 
7725         if (keys 
|| vkeys
) { 
7726             info 
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n", 
7733 static void infoCommand(redisClient 
*c
) { 
7734     sds info 
= genRedisInfoString(); 
7735     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n", 
7736         (unsigned long)sdslen(info
))); 
7737     addReplySds(c
,info
); 
7738     addReply(c
,shared
.crlf
); 
7741 static void monitorCommand(redisClient 
*c
) { 
7742     /* ignore MONITOR if aleady slave or in monitor mode */ 
7743     if (c
->flags 
& REDIS_SLAVE
) return; 
7745     c
->flags 
|= (REDIS_SLAVE
|REDIS_MONITOR
); 
7747     listAddNodeTail(server
.monitors
,c
); 
7748     addReply(c
,shared
.ok
); 
7751 /* ================================= Expire ================================= */ 
7752 static int removeExpire(redisDb 
*db
, robj 
*key
) { 
7753     if (dictDelete(db
->expires
,key
->ptr
) == DICT_OK
) { 
7760 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
7761     sds copy 
= sdsdup(key
->ptr
); 
7762     if (dictAdd(db
->expires
,copy
,(void*)when
) == DICT_ERR
) { 
7770 /* Return the expire time of the specified key, or -1 if no expire 
7771  * is associated with this key (i.e. the key is non volatile) */ 
7772 static time_t getExpire(redisDb 
*db
, robj 
*key
) { 
7775     /* No expire? return ASAP */ 
7776     if (dictSize(db
->expires
) == 0 || 
7777        (de 
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return -1; 
7779     return (time_t) dictGetEntryVal(de
); 
7782 static int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
7786     /* No expire? return ASAP */ 
7787     if (dictSize(db
->expires
) == 0 || 
7788        (de 
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0; 
7790     /* Lookup the expire */ 
7791     when 
= (time_t) dictGetEntryVal(de
); 
7792     if (time(NULL
) <= when
) return 0; 
7794     /* Delete the key */ 
7796     server
.stat_expiredkeys
++; 
7800 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
) { 
7803     /* No expire? return ASAP */ 
7804     if (dictSize(db
->expires
) == 0 || 
7805        (de 
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0; 
7807     /* Delete the key */ 
7809     server
.stat_expiredkeys
++; 
7810     dictDelete(db
->expires
,key
->ptr
); 
7811     return dictDelete(db
->dict
,key
->ptr
) == DICT_OK
; 
7814 static void expireGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*param
, long offset
) { 
7818     if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return; 
7822     de 
= dictFind(c
->db
->dict
,key
->ptr
); 
7824         addReply(c
,shared
.czero
); 
7828         if (dbDelete(c
->db
,key
)) server
.dirty
++; 
7829         addReply(c
, shared
.cone
); 
7832         time_t when 
= time(NULL
)+seconds
; 
7833         if (setExpire(c
->db
,key
,when
)) { 
7834             addReply(c
,shared
.cone
); 
7837             addReply(c
,shared
.czero
); 
7843 static void expireCommand(redisClient 
*c
) { 
7844     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0); 
7847 static void expireatCommand(redisClient 
*c
) { 
7848     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
)); 
7851 static void ttlCommand(redisClient 
*c
) { 
7855     expire 
= getExpire(c
->db
,c
->argv
[1]); 
7857         ttl 
= (int) (expire
-time(NULL
)); 
7858         if (ttl 
< 0) ttl 
= -1; 
7860     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
)); 
7863 /* ================================ MULTI/EXEC ============================== */ 
7865 /* Client state initialization for MULTI/EXEC */ 
7866 static void initClientMultiState(redisClient 
*c
) { 
7867     c
->mstate
.commands 
= NULL
; 
7868     c
->mstate
.count 
= 0; 
7871 /* Release all the resources associated with MULTI/EXEC state */ 
7872 static void freeClientMultiState(redisClient 
*c
) { 
7875     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
7877         multiCmd 
*mc 
= c
->mstate
.commands
+j
; 
7879         for (i 
= 0; i 
< mc
->argc
; i
++) 
7880             decrRefCount(mc
->argv
[i
]); 
7883     zfree(c
->mstate
.commands
); 
7886 /* Add a new command into the MULTI commands queue */ 
7887 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
) { 
7891     c
->mstate
.commands 
= zrealloc(c
->mstate
.commands
, 
7892             sizeof(multiCmd
)*(c
->mstate
.count
+1)); 
7893     mc 
= c
->mstate
.commands
+c
->mstate
.count
; 
7896     mc
->argv 
= zmalloc(sizeof(robj
*)*c
->argc
); 
7897     memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
); 
7898     for (j 
= 0; j 
< c
->argc
; j
++) 
7899         incrRefCount(mc
->argv
[j
]); 
7903 static void multiCommand(redisClient 
*c
) { 
7904     if (c
->flags 
& REDIS_MULTI
) { 
7905         addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n")); 
7908     c
->flags 
|= REDIS_MULTI
; 
7909     addReply(c
,shared
.ok
); 
7912 static void discardCommand(redisClient 
*c
) { 
7913     if (!(c
->flags 
& REDIS_MULTI
)) { 
7914         addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n")); 
7918     freeClientMultiState(c
); 
7919     initClientMultiState(c
); 
7920     c
->flags 
&= (~REDIS_MULTI
); 
7921     addReply(c
,shared
.ok
); 
7924 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand 
7925  * implememntation for more information. */ 
7926 static void execCommandReplicateMulti(redisClient 
*c
) { 
7927     struct redisCommand 
*cmd
; 
7928     robj 
*multistring 
= createStringObject("MULTI",5); 
7930     cmd 
= lookupCommand("multi"); 
7931     if (server
.appendonly
) 
7932         feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1); 
7933     if (listLength(server
.slaves
)) 
7934         replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1); 
7935     decrRefCount(multistring
); 
7938 static void execCommand(redisClient 
*c
) { 
7943     if (!(c
->flags 
& REDIS_MULTI
)) { 
7944         addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n")); 
7948     /* Check if we need to abort the EXEC if some WATCHed key was touched. 
7949      * A failed EXEC will return a multi bulk nil object. */ 
7950     if (c
->flags 
& REDIS_DIRTY_CAS
) { 
7951         freeClientMultiState(c
); 
7952         initClientMultiState(c
); 
7953         c
->flags 
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
); 
7955         addReply(c
,shared
.nullmultibulk
); 
7959     /* Replicate a MULTI request now that we are sure the block is executed. 
7960      * This way we'll deliver the MULTI/..../EXEC block as a whole and 
7961      * both the AOF and the replication link will have the same consistency 
7962      * and atomicity guarantees. */ 
7963     execCommandReplicateMulti(c
); 
7965     /* Exec all the queued commands */ 
7966     unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */ 
7967     orig_argv 
= c
->argv
; 
7968     orig_argc 
= c
->argc
; 
7969     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
)); 
7970     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
7971         c
->argc 
= c
->mstate
.commands
[j
].argc
; 
7972         c
->argv 
= c
->mstate
.commands
[j
].argv
; 
7973         call(c
,c
->mstate
.commands
[j
].cmd
); 
7975     c
->argv 
= orig_argv
; 
7976     c
->argc 
= orig_argc
; 
7977     freeClientMultiState(c
); 
7978     initClientMultiState(c
); 
7979     c
->flags 
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
); 
7980     /* Make sure the EXEC command is always replicated / AOF, since we 
7981      * always send the MULTI command (we can't know beforehand if the 
7982      * next operations will contain at least a modification to the DB). */ 
7986 /* =========================== Blocking Operations  ========================= */ 
7988 /* Currently Redis blocking operations support is limited to list POP ops, 
7989  * so the current implementation is not fully generic, but it is also not 
7990  * completely specific so it will not require a rewrite to support new 
7991  * kind of blocking operations in the future. 
7993  * Still it's important to note that list blocking operations can be already 
7994  * used as a notification mechanism in order to implement other blocking 
7995  * operations at application level, so there must be a very strong evidence 
7996  * of usefulness and generality before new blocking operations are implemented. 
7998  * This is how the current blocking POP works, we use BLPOP as example: 
7999  * - If the user calls BLPOP and the key exists and contains a non empty list 
8000  *   then LPOP is called instead. So BLPOP is semantically the same as LPOP 
8001  *   if there is not to block. 
8002  * - If instead BLPOP is called and the key does not exists or the list is 
8003  *   empty we need to block. In order to do so we remove the notification for 
8004  *   new data to read in the client socket (so that we'll not serve new 
8005  *   requests if the blocking request is not served). Also we put the client 
8006  *   in a dictionary (db->blocking_keys) mapping keys to a list of clients 
8007  *   blocking for this keys. 
8008  * - If a PUSH operation against a key with blocked clients waiting is 
8009  *   performed, we serve the first in the list: basically instead to push 
8010  *   the new element inside the list we return it to the (first / oldest) 
8011  *   blocking client, unblock the client, and remove it form the list. 
8013  * The above comment and the source code should be enough in order to understand 
8014  * the implementation and modify / fix it later. 
8017 /* Set a client in blocking mode for the specified key, with the specified 
8019 static void blockForKeys(redisClient 
*c
, robj 
**keys
, int numkeys
, time_t timeout
) { 
8024     c
->blocking_keys 
= zmalloc(sizeof(robj
*)*numkeys
); 
8025     c
->blocking_keys_num 
= numkeys
; 
8026     c
->blockingto 
= timeout
; 
8027     for (j 
= 0; j 
< numkeys
; j
++) { 
8028         /* Add the key in the client structure, to map clients -> keys */ 
8029         c
->blocking_keys
[j
] = keys
[j
]; 
8030         incrRefCount(keys
[j
]); 
8032         /* And in the other "side", to map keys -> clients */ 
8033         de 
= dictFind(c
->db
->blocking_keys
,keys
[j
]); 
8037             /* For every key we take a list of clients blocked for it */ 
8039             retval 
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
); 
8040             incrRefCount(keys
[j
]); 
8041             assert(retval 
== DICT_OK
); 
8043             l 
= dictGetEntryVal(de
); 
8045         listAddNodeTail(l
,c
); 
8047     /* Mark the client as a blocked client */ 
8048     c
->flags 
|= REDIS_BLOCKED
; 
8049     server
.blpop_blocked_clients
++; 
8052 /* Unblock a client that's waiting in a blocking operation such as BLPOP */ 
8053 static void unblockClientWaitingData(redisClient 
*c
) { 
8058     assert(c
->blocking_keys 
!= NULL
); 
8059     /* The client may wait for multiple keys, so unblock it for every key. */ 
8060     for (j 
= 0; j 
< c
->blocking_keys_num
; j
++) { 
8061         /* Remove this client from the list of clients waiting for this key. */ 
8062         de 
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]); 
8064         l 
= dictGetEntryVal(de
); 
8065         listDelNode(l
,listSearchKey(l
,c
)); 
8066         /* If the list is empty we need to remove it to avoid wasting memory */ 
8067         if (listLength(l
) == 0) 
8068             dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]); 
8069         decrRefCount(c
->blocking_keys
[j
]); 
8071     /* Cleanup the client structure */ 
8072     zfree(c
->blocking_keys
); 
8073     c
->blocking_keys 
= NULL
; 
8074     c
->flags 
&= (~REDIS_BLOCKED
); 
8075     server
.blpop_blocked_clients
--; 
8076     /* We want to process data if there is some command waiting 
8077      * in the input buffer. Note that this is safe even if 
8078      * unblockClientWaitingData() gets called from freeClient() because 
8079      * freeClient() will be smart enough to call this function 
8080      * *after* c->querybuf was set to NULL. */ 
8081     if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
); 
8084 /* This should be called from any function PUSHing into lists. 
8085  * 'c' is the "pushing client", 'key' is the key it is pushing data against, 
8086  * 'ele' is the element pushed. 
8088  * If the function returns 0 there was no client waiting for a list push 
8091  * If the function returns 1 there was a client waiting for a list push 
8092  * against this key, the element was passed to this client thus it's not 
8093  * needed to actually add it to the list and the caller should return asap. */ 
8094 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
) { 
8095     struct dictEntry 
*de
; 
8096     redisClient 
*receiver
; 
8100     de 
= dictFind(c
->db
->blocking_keys
,key
); 
8101     if (de 
== NULL
) return 0; 
8102     l 
= dictGetEntryVal(de
); 
8105     receiver 
= ln
->value
; 
8107     addReplySds(receiver
,sdsnew("*2\r\n")); 
8108     addReplyBulk(receiver
,key
); 
8109     addReplyBulk(receiver
,ele
); 
8110     unblockClientWaitingData(receiver
); 
8114 /* Blocking RPOP/LPOP */ 
8115 static void blockingPopGenericCommand(redisClient 
*c
, int where
) { 
8120     for (j 
= 1; j 
< c
->argc
-1; j
++) { 
8121         o 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
8123             if (o
->type 
!= REDIS_LIST
) { 
8124                 addReply(c
,shared
.wrongtypeerr
); 
8127                 list 
*list 
= o
->ptr
; 
8128                 if (listLength(list
) != 0) { 
8129                     /* If the list contains elements fall back to the usual 
8130                      * non-blocking POP operation */ 
8131                     robj 
*argv
[2], **orig_argv
; 
8134                     /* We need to alter the command arguments before to call 
8135                      * popGenericCommand() as the command takes a single key. */ 
8136                     orig_argv 
= c
->argv
; 
8137                     orig_argc 
= c
->argc
; 
8138                     argv
[1] = c
->argv
[j
]; 
8142                     /* Also the return value is different, we need to output 
8143                      * the multi bulk reply header and the key name. The 
8144                      * "real" command will add the last element (the value) 
8145                      * for us. If this souds like an hack to you it's just 
8146                      * because it is... */ 
8147                     addReplySds(c
,sdsnew("*2\r\n")); 
8148                     addReplyBulk(c
,argv
[1]); 
8149                     popGenericCommand(c
,where
); 
8151                     /* Fix the client structure with the original stuff */ 
8152                     c
->argv 
= orig_argv
; 
8153                     c
->argc 
= orig_argc
; 
8159     /* If the list is empty or the key does not exists we must block */ 
8160     timeout 
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10); 
8161     if (timeout 
> 0) timeout 
+= time(NULL
); 
8162     blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
); 
8165 static void blpopCommand(redisClient 
*c
) { 
8166     blockingPopGenericCommand(c
,REDIS_HEAD
); 
8169 static void brpopCommand(redisClient 
*c
) { 
8170     blockingPopGenericCommand(c
,REDIS_TAIL
); 
8173 /* =============================== Replication  ============================= */ 
8175 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
8176     ssize_t nwritten
, ret 
= size
; 
8177     time_t start 
= time(NULL
); 
8181         if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) { 
8182             nwritten 
= write(fd
,ptr
,size
); 
8183             if (nwritten 
== -1) return -1; 
8187         if ((time(NULL
)-start
) > timeout
) { 
8195 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
8196     ssize_t nread
, totread 
= 0; 
8197     time_t start 
= time(NULL
); 
8201         if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) { 
8202             nread 
= read(fd
,ptr
,size
); 
8203             if (nread 
== -1) return -1; 
8208         if ((time(NULL
)-start
) > timeout
) { 
8216 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
8223         if (syncRead(fd
,&c
,1,timeout
) == -1) return -1; 
8226             if (nread 
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0'; 
8237 static void syncCommand(redisClient 
*c
) { 
8238     /* ignore SYNC if aleady slave or in monitor mode */ 
8239     if (c
->flags 
& REDIS_SLAVE
) return; 
8241     /* SYNC can't be issued when the server has pending data to send to 
8242      * the client about already issued commands. We need a fresh reply 
8243      * buffer registering the differences between the BGSAVE and the current 
8244      * dataset, so that we can copy to other slaves if needed. */ 
8245     if (listLength(c
->reply
) != 0) { 
8246         addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n")); 
8250     redisLog(REDIS_NOTICE
,"Slave ask for synchronization"); 
8251     /* Here we need to check if there is a background saving operation 
8252      * in progress, or if it is required to start one */ 
8253     if (server
.bgsavechildpid 
!= -1) { 
8254         /* Ok a background save is in progress. Let's check if it is a good 
8255          * one for replication, i.e. if there is another slave that is 
8256          * registering differences since the server forked to save */ 
8261         listRewind(server
.slaves
,&li
); 
8262         while((ln 
= listNext(&li
))) { 
8264             if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) break; 
8267             /* Perfect, the server is already registering differences for 
8268              * another slave. Set the right state, and copy the buffer. */ 
8269             listRelease(c
->reply
); 
8270             c
->reply 
= listDup(slave
->reply
); 
8271             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
8272             redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC"); 
8274             /* No way, we need to wait for the next BGSAVE in order to 
8275              * register differences */ 
8276             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
8277             redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC"); 
8280         /* Ok we don't have a BGSAVE in progress, let's start one */ 
8281         redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC"); 
8282         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
8283             redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE"); 
8284             addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n")); 
8287         c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
8290     c
->flags 
|= REDIS_SLAVE
; 
8292     listAddNodeTail(server
.slaves
,c
); 
8296 static void sendBulkToSlave(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
8297     redisClient 
*slave 
= privdata
; 
8299     REDIS_NOTUSED(mask
); 
8300     char buf
[REDIS_IOBUF_LEN
]; 
8301     ssize_t nwritten
, buflen
; 
8303     if (slave
->repldboff 
== 0) { 
8304         /* Write the bulk write count before to transfer the DB. In theory here 
8305          * we don't know how much room there is in the output buffer of the 
8306          * socket, but in pratice SO_SNDLOWAT (the minimum count for output 
8307          * operations) will never be smaller than the few bytes we need. */ 
8310         bulkcount 
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long) 
8312         if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
)) 
8320     lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
); 
8321     buflen 
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
); 
8323         redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s", 
8324             (buflen 
== 0) ? "premature EOF" : strerror(errno
)); 
8328     if ((nwritten 
= write(fd
,buf
,buflen
)) == -1) { 
8329         redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s", 
8334     slave
->repldboff 
+= nwritten
; 
8335     if (slave
->repldboff 
== slave
->repldbsize
) { 
8336         close(slave
->repldbfd
); 
8337         slave
->repldbfd 
= -1; 
8338         aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
8339         slave
->replstate 
= REDIS_REPL_ONLINE
; 
8340         if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, 
8341             sendReplyToClient
, slave
) == AE_ERR
) { 
8345         addReplySds(slave
,sdsempty()); 
8346         redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded"); 
8350 /* This function is called at the end of every backgrond saving. 
8351  * The argument bgsaveerr is REDIS_OK if the background saving succeeded 
8352  * otherwise REDIS_ERR is passed to the function. 
8354  * The goal of this function is to handle slaves waiting for a successful 
8355  * background saving in order to perform non-blocking synchronization. */ 
8356 static void updateSlavesWaitingBgsave(int bgsaveerr
) { 
8358     int startbgsave 
= 0; 
8361     listRewind(server
.slaves
,&li
); 
8362     while((ln 
= listNext(&li
))) { 
8363         redisClient 
*slave 
= ln
->value
; 
8365         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) { 
8367             slave
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
8368         } else if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) { 
8369             struct redis_stat buf
; 
8371             if (bgsaveerr 
!= REDIS_OK
) { 
8373                 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error"); 
8376             if ((slave
->repldbfd 
= open(server
.dbfilename
,O_RDONLY
)) == -1 || 
8377                 redis_fstat(slave
->repldbfd
,&buf
) == -1) { 
8379                 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
)); 
8382             slave
->repldboff 
= 0; 
8383             slave
->repldbsize 
= buf
.st_size
; 
8384             slave
->replstate 
= REDIS_REPL_SEND_BULK
; 
8385             aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
8386             if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) { 
8393         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
8396             listRewind(server
.slaves
,&li
); 
8397             redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed"); 
8398             while((ln 
= listNext(&li
))) { 
8399                 redisClient 
*slave 
= ln
->value
; 
8401                 if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) 
8408 static int syncWithMaster(void) { 
8409     char buf
[1024], tmpfile
[256], authcmd
[1024]; 
8411     int fd 
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
); 
8412     int dfd
, maxtries 
= 5; 
8415         redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s", 
8420     /* AUTH with the master if required. */ 
8421     if(server
.masterauth
) { 
8422         snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
); 
8423         if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) { 
8425             redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s", 
8429         /* Read the AUTH result.  */ 
8430         if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
8432             redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s", 
8436         if (buf
[0] != '+') { 
8438             redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?"); 
8443     /* Issue the SYNC command */ 
8444     if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) { 
8446         redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s", 
8450     /* Read the bulk write count */ 
8451     if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
8453         redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s", 
8457     if (buf
[0] != '$') { 
8459         redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); 
8462     dumpsize 
= strtol(buf
+1,NULL
,10); 
8463     redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
); 
8464     /* Read the bulk write data on a temp file */ 
8466         snprintf(tmpfile
,256, 
8467             "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid()); 
8468         dfd 
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644); 
8469         if (dfd 
!= -1) break; 
8474         redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
)); 
8478         int nread
, nwritten
; 
8480         nread 
= read(fd
,buf
,(dumpsize 
< 1024)?dumpsize
:1024); 
8482             redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s", 
8488         nwritten 
= write(dfd
,buf
,nread
); 
8489         if (nwritten 
== -1) { 
8490             redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
)); 
8498     if (rename(tmpfile
,server
.dbfilename
) == -1) { 
8499         redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
)); 
8505     if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
8506         redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk"); 
8510     server
.master 
= createClient(fd
); 
8511     server
.master
->flags 
|= REDIS_MASTER
; 
8512     server
.master
->authenticated 
= 1; 
8513     server
.replstate 
= REDIS_REPL_CONNECTED
; 
8517 static void slaveofCommand(redisClient 
*c
) { 
8518     if (!strcasecmp(c
->argv
[1]->ptr
,"no") && 
8519         !strcasecmp(c
->argv
[2]->ptr
,"one")) { 
8520         if (server
.masterhost
) { 
8521             sdsfree(server
.masterhost
); 
8522             server
.masterhost 
= NULL
; 
8523             if (server
.master
) freeClient(server
.master
); 
8524             server
.replstate 
= REDIS_REPL_NONE
; 
8525             redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)"); 
8528         sdsfree(server
.masterhost
); 
8529         server
.masterhost 
= sdsdup(c
->argv
[1]->ptr
); 
8530         server
.masterport 
= atoi(c
->argv
[2]->ptr
); 
8531         if (server
.master
) freeClient(server
.master
); 
8532         server
.replstate 
= REDIS_REPL_CONNECT
; 
8533         redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)", 
8534             server
.masterhost
, server
.masterport
); 
8536     addReply(c
,shared
.ok
); 
8539 /* ============================ Maxmemory directive  ======================== */ 
8541 /* Try to free one object form the pre-allocated objects free list. 
8542  * This is useful under low mem conditions as by default we take 1 million 
8543  * free objects allocated. On success REDIS_OK is returned, otherwise 
8545 static int tryFreeOneObjectFromFreelist(void) { 
8548     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
8549     if (listLength(server
.objfreelist
)) { 
8550         listNode 
*head 
= listFirst(server
.objfreelist
); 
8551         o 
= listNodeValue(head
); 
8552         listDelNode(server
.objfreelist
,head
); 
8553         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
8557         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
8562 /* This function gets called when 'maxmemory' is set on the config file to limit 
8563  * the max memory used by the server, and we are out of memory. 
8564  * This function will try to, in order: 
8566  * - Free objects from the free list 
8567  * - Try to remove keys with an EXPIRE set 
8569  * It is not possible to free enough memory to reach used-memory < maxmemory 
8570  * the server will start refusing commands that will enlarge even more the 
8573 static void freeMemoryIfNeeded(void) { 
8574     while (server
.maxmemory 
&& zmalloc_used_memory() > server
.maxmemory
) { 
8575         int j
, k
, freed 
= 0; 
8577         if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
8578         for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8580             robj 
*minkey 
= NULL
; 
8581             struct dictEntry 
*de
; 
8583             if (dictSize(server
.db
[j
].expires
)) { 
8585                 /* From a sample of three keys drop the one nearest to 
8586                  * the natural expire */ 
8587                 for (k 
= 0; k 
< 3; k
++) { 
8590                     de 
= dictGetRandomKey(server
.db
[j
].expires
); 
8591                     t 
= (time_t) dictGetEntryVal(de
); 
8592                     if (minttl 
== -1 || t 
< minttl
) { 
8593                         minkey 
= dictGetEntryKey(de
); 
8597                 dbDelete(server
.db
+j
,minkey
); 
8600         if (!freed
) return; /* nothing to free... */ 
8604 /* ============================== Append Only file ========================== */ 
8606 /* Called when the user switches from "appendonly yes" to "appendonly no" 
8607  * at runtime using the CONFIG command. */ 
8608 static void stopAppendOnly(void) { 
8609     flushAppendOnlyFile(); 
8610     aof_fsync(server
.appendfd
); 
8611     close(server
.appendfd
); 
8613     server
.appendfd 
= -1; 
8614     server
.appendseldb 
= -1; 
8615     server
.appendonly 
= 0; 
8616     /* rewrite operation in progress? kill it, wait child exit */ 
8617     if (server
.bgsavechildpid 
!= -1) { 
8620         if (kill(server
.bgsavechildpid
,SIGKILL
) != -1) 
8621             wait3(&statloc
,0,NULL
); 
8622         /* reset the buffer accumulating changes while the child saves */ 
8623         sdsfree(server
.bgrewritebuf
); 
8624         server
.bgrewritebuf 
= sdsempty(); 
8625         server
.bgsavechildpid 
= -1; 
8629 /* Called when the user switches from "appendonly no" to "appendonly yes" 
8630  * at runtime using the CONFIG command. */ 
8631 static int startAppendOnly(void) { 
8632     server
.appendonly 
= 1; 
8633     server
.lastfsync 
= time(NULL
); 
8634     server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
8635     if (server
.appendfd 
== -1) { 
8636         redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
)); 
8639     if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) { 
8640         server
.appendonly 
= 0; 
8641         close(server
.appendfd
); 
8642         redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
)); 
8648 /* Write the append only file buffer on disk. 
8650  * Since we are required to write the AOF before replying to the client, 
8651  * and the only way the client socket can get a write is entering when the 
8652  * the event loop, we accumulate all the AOF writes in a memory 
8653  * buffer and write it on disk using this function just before entering 
8654  * the event loop again. */ 
8655 static void flushAppendOnlyFile(void) { 
8659     if (sdslen(server
.aofbuf
) == 0) return; 
8661     /* We want to perform a single write. This should be guaranteed atomic 
8662      * at least if the filesystem we are writing is a real physical one. 
8663      * While this will save us against the server being killed I don't think 
8664      * there is much to do about the whole server stopping for power problems 
8666      nwritten 
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
)); 
8667      if (nwritten 
!= (signed)sdslen(server
.aofbuf
)) { 
8668         /* Ooops, we are in troubles. The best thing to do for now is 
8669          * aborting instead of giving the illusion that everything is 
8670          * working as expected. */ 
8671          if (nwritten 
== -1) { 
8672             redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
)); 
8674             redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
)); 
8678     sdsfree(server
.aofbuf
); 
8679     server
.aofbuf 
= sdsempty(); 
8681     /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have 
8682      * childs performing heavy I/O on disk. */ 
8683     if (server
.no_appendfsync_on_rewrite 
&& 
8684         (server
.bgrewritechildpid 
!= -1 || server
.bgsavechildpid 
!= -1)) 
8686     /* Fsync if needed */ 
8688     if (server
.appendfsync 
== APPENDFSYNC_ALWAYS 
|| 
8689         (server
.appendfsync 
== APPENDFSYNC_EVERYSEC 
&& 
8690          now
-server
.lastfsync 
> 1)) 
8692         /* aof_fsync is defined as fdatasync() for Linux in order to avoid 
8693          * flushing metadata. */ 
8694         aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */ 
8695         server
.lastfsync 
= now
; 
8699 static sds 
catAppendOnlyGenericCommand(sds buf
, int argc
, robj 
**argv
) { 
8701     buf 
= sdscatprintf(buf
,"*%d\r\n",argc
); 
8702     for (j 
= 0; j 
< argc
; j
++) { 
8703         robj 
*o 
= getDecodedObject(argv
[j
]); 
8704         buf 
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
)); 
8705         buf 
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
)); 
8706         buf 
= sdscatlen(buf
,"\r\n",2); 
8712 static sds 
catAppendOnlyExpireAtCommand(sds buf
, robj 
*key
, robj 
*seconds
) { 
8717     /* Make sure we can use strtol */ 
8718     seconds 
= getDecodedObject(seconds
); 
8719     when 
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10); 
8720     decrRefCount(seconds
); 
8722     argv
[0] = createStringObject("EXPIREAT",8); 
8724     argv
[2] = createObject(REDIS_STRING
, 
8725         sdscatprintf(sdsempty(),"%ld",when
)); 
8726     buf 
= catAppendOnlyGenericCommand(buf
, argc
, argv
); 
8727     decrRefCount(argv
[0]); 
8728     decrRefCount(argv
[2]); 
8732 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
8733     sds buf 
= sdsempty(); 
8736     /* The DB this command was targetting is not the same as the last command 
8737      * we appendend. To issue a SELECT command is needed. */ 
8738     if (dictid 
!= server
.appendseldb
) { 
8741         snprintf(seldb
,sizeof(seldb
),"%d",dictid
); 
8742         buf 
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", 
8743             (unsigned long)strlen(seldb
),seldb
); 
8744         server
.appendseldb 
= dictid
; 
8747     if (cmd
->proc 
== expireCommand
) { 
8748         /* Translate EXPIRE into EXPIREAT */ 
8749         buf 
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]); 
8750     } else if (cmd
->proc 
== setexCommand
) { 
8751         /* Translate SETEX to SET and EXPIREAT */ 
8752         tmpargv
[0] = createStringObject("SET",3); 
8753         tmpargv
[1] = argv
[1]; 
8754         tmpargv
[2] = argv
[3]; 
8755         buf 
= catAppendOnlyGenericCommand(buf
,3,tmpargv
); 
8756         decrRefCount(tmpargv
[0]); 
8757         buf 
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]); 
8759         buf 
= catAppendOnlyGenericCommand(buf
,argc
,argv
); 
8762     /* Append to the AOF buffer. This will be flushed on disk just before 
8763      * of re-entering the event loop, so before the client will get a 
8764      * positive reply about the operation performed. */ 
8765     server
.aofbuf 
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
)); 
8767     /* If a background append only file rewriting is in progress we want to 
8768      * accumulate the differences between the child DB and the current one 
8769      * in a buffer, so that when the child process will do its work we 
8770      * can append the differences to the new append only file. */ 
8771     if (server
.bgrewritechildpid 
!= -1) 
8772         server
.bgrewritebuf 
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
)); 
8777 /* In Redis commands are always executed in the context of a client, so in 
8778  * order to load the append only file we need to create a fake client. */ 
8779 static struct redisClient 
*createFakeClient(void) { 
8780     struct redisClient 
*c 
= zmalloc(sizeof(*c
)); 
8784     c
->querybuf 
= sdsempty(); 
8788     /* We set the fake client as a slave waiting for the synchronization 
8789      * so that Redis will not try to send replies to this client. */ 
8790     c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
8791     c
->reply 
= listCreate(); 
8792     listSetFreeMethod(c
->reply
,decrRefCount
); 
8793     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
8794     initClientMultiState(c
); 
8798 static void freeFakeClient(struct redisClient 
*c
) { 
8799     sdsfree(c
->querybuf
); 
8800     listRelease(c
->reply
); 
8801     freeClientMultiState(c
); 
8805 /* Replay the append log file. On error REDIS_OK is returned. On non fatal 
8806  * error (the append only file is zero-length) REDIS_ERR is returned. On 
8807  * fatal error an error message is logged and the program exists. */ 
8808 int loadAppendOnlyFile(char *filename
) { 
8809     struct redisClient 
*fakeClient
; 
8810     FILE *fp 
= fopen(filename
,"r"); 
8811     struct redis_stat sb
; 
8812     int appendonly 
= server
.appendonly
; 
8814     if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size 
== 0) 
8818         redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
)); 
8822     /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI 
8823      * to the same file we're about to read. */ 
8824     server
.appendonly 
= 0; 
8826     fakeClient 
= createFakeClient(); 
8833         struct redisCommand 
*cmd
; 
8836         if (fgets(buf
,sizeof(buf
),fp
) == NULL
) { 
8842         if (buf
[0] != '*') goto fmterr
; 
8844         argv 
= zmalloc(sizeof(robj
*)*argc
); 
8845         for (j 
= 0; j 
< argc
; j
++) { 
8846             if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
; 
8847             if (buf
[0] != '$') goto fmterr
; 
8848             len 
= strtol(buf
+1,NULL
,10); 
8849             argsds 
= sdsnewlen(NULL
,len
); 
8850             if (len 
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
; 
8851             argv
[j
] = createObject(REDIS_STRING
,argsds
); 
8852             if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */ 
8855         /* Command lookup */ 
8856         cmd 
= lookupCommand(argv
[0]->ptr
); 
8858             redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
); 
8861         /* Try object encoding */ 
8862         if (cmd
->flags 
& REDIS_CMD_BULK
) 
8863             argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]); 
8864         /* Run the command in the context of a fake client */ 
8865         fakeClient
->argc 
= argc
; 
8866         fakeClient
->argv 
= argv
; 
8867         cmd
->proc(fakeClient
); 
8868         /* Discard the reply objects list from the fake client */ 
8869         while(listLength(fakeClient
->reply
)) 
8870             listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
)); 
8871         /* Clean up, ready for the next command */ 
8872         for (j 
= 0; j 
< argc
; j
++) decrRefCount(argv
[j
]); 
8874         /* Handle swapping while loading big datasets when VM is on */ 
8876         if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32) 
8879         if (server
.vm_enabled 
&& force_swapout
) { 
8880             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
8881                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
8886     /* This point can only be reached when EOF is reached without errors. 
8887      * If the client is in the middle of a MULTI/EXEC, log error and quit. */ 
8888     if (fakeClient
->flags 
& REDIS_MULTI
) goto readerr
; 
8891     freeFakeClient(fakeClient
); 
8892     server
.appendonly 
= appendonly
; 
8897         redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file"); 
8899         redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
)); 
8903     redisLog(REDIS_WARNING
,"Bad file format reading the append only file"); 
8907 /* Write binary-safe string into a file in the bulkformat 
8908  * $<count>\r\n<payload>\r\n */ 
8909 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) { 
8913     clen 
= 1+ll2string(cbuf
+1,sizeof(cbuf
)-1,len
); 
8914     cbuf
[clen
++] = '\r'; 
8915     cbuf
[clen
++] = '\n'; 
8916     if (fwrite(cbuf
,clen
,1,fp
) == 0) return 0; 
8917     if (len 
> 0 && fwrite(s
,len
,1,fp
) == 0) return 0; 
8918     if (fwrite("\r\n",2,1,fp
) == 0) return 0; 
8922 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */ 
8923 static int fwriteBulkDouble(FILE *fp
, double d
) { 
8924     char buf
[128], dbuf
[128]; 
8926     snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
); 
8927     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2); 
8928     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
8929     if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0; 
8933 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */ 
8934 static int fwriteBulkLongLong(FILE *fp
, long long l
) { 
8935     char bbuf
[128], lbuf
[128]; 
8936     unsigned int blen
, llen
; 
8937     llen 
= ll2string(lbuf
,32,l
); 
8938     blen 
= snprintf(bbuf
,sizeof(bbuf
),"$%u\r\n%s\r\n",llen
,lbuf
); 
8939     if (fwrite(bbuf
,blen
,1,fp
) == 0) return 0; 
8943 /* Delegate writing an object to writing a bulk string or bulk long long. */ 
8944 static int fwriteBulkObject(FILE *fp
, robj 
*obj
) { 
8945     /* Avoid using getDecodedObject to help copy-on-write (we are often 
8946      * in a child process when this function is called). */ 
8947     if (obj
->encoding 
== REDIS_ENCODING_INT
) { 
8948         return fwriteBulkLongLong(fp
,(long)obj
->ptr
); 
8949     } else if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
8950         return fwriteBulkString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
8952         redisPanic("Unknown string encoding"); 
8956 /* Write a sequence of commands able to fully rebuild the dataset into 
8957  * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ 
8958 static int rewriteAppendOnlyFile(char *filename
) { 
8959     dictIterator 
*di 
= NULL
; 
8964     time_t now 
= time(NULL
); 
8966     /* Note that we have to use a different temp name here compared to the 
8967      * one used by rewriteAppendOnlyFileBackground() function. */ 
8968     snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid()); 
8969     fp 
= fopen(tmpfile
,"w"); 
8971         redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
)); 
8974     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8975         char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n"; 
8976         redisDb 
*db 
= server
.db
+j
; 
8978         if (dictSize(d
) == 0) continue; 
8979         di 
= dictGetIterator(d
); 
8985         /* SELECT the new DB */ 
8986         if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
; 
8987         if (fwriteBulkLongLong(fp
,j
) == 0) goto werr
; 
8989         /* Iterate this DB writing every entry */ 
8990         while((de 
= dictNext(di
)) != NULL
) { 
8991             sds keystr 
= dictGetEntryKey(de
); 
8996             keystr 
= dictGetEntryKey(de
); 
8997             o 
= dictGetEntryVal(de
); 
8998             initStaticStringObject(key
,keystr
); 
8999             /* If the value for this key is swapped, load a preview in memory. 
9000              * We use a "swapped" flag to remember if we need to free the 
9001              * value object instead to just increment the ref count anyway 
9002              * in order to avoid copy-on-write of pages if we are forked() */ 
9003             if (!server
.vm_enabled 
|| o
->storage 
== REDIS_VM_MEMORY 
|| 
9004                 o
->storage 
== REDIS_VM_SWAPPING
) { 
9007                 o 
= vmPreviewObject(o
); 
9010             expiretime 
= getExpire(db
,&key
); 
9012             /* Save the key and associated value */ 
9013             if (o
->type 
== REDIS_STRING
) { 
9014                 /* Emit a SET command */ 
9015                 char cmd
[]="*3\r\n$3\r\nSET\r\n"; 
9016                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9018                 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9019                 if (fwriteBulkObject(fp
,o
) == 0) goto werr
; 
9020             } else if (o
->type 
== REDIS_LIST
) { 
9021                 /* Emit the RPUSHes needed to rebuild the list */ 
9022                 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n"; 
9023                 if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
9024                     unsigned char *zl 
= o
->ptr
; 
9025                     unsigned char *p 
= ziplistIndex(zl
,0); 
9026                     unsigned char *vstr
; 
9030                     while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
9031                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9032                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9034                             if (fwriteBulkString(fp
,(char*)vstr
,vlen
) == 0) 
9037                             if (fwriteBulkLongLong(fp
,vlong
) == 0) 
9040                         p 
= ziplistNext(zl
,p
); 
9042                 } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
9043                     list 
*list 
= o
->ptr
; 
9047                     listRewind(list
,&li
); 
9048                     while((ln 
= listNext(&li
))) { 
9049                         robj 
*eleobj 
= listNodeValue(ln
); 
9051                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9052                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9053                         if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
9056                     redisPanic("Unknown list encoding"); 
9058             } else if (o
->type 
== REDIS_SET
) { 
9059                 /* Emit the SADDs needed to rebuild the set */ 
9061                 dictIterator 
*di 
= dictGetIterator(set
); 
9064                 while((de 
= dictNext(di
)) != NULL
) { 
9065                     char cmd
[]="*3\r\n$4\r\nSADD\r\n"; 
9066                     robj 
*eleobj 
= dictGetEntryKey(de
); 
9068                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9069                     if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9070                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
9072                 dictReleaseIterator(di
); 
9073             } else if (o
->type 
== REDIS_ZSET
) { 
9074                 /* Emit the ZADDs needed to rebuild the sorted set */ 
9076                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
9079                 while((de 
= dictNext(di
)) != NULL
) { 
9080                     char cmd
[]="*4\r\n$4\r\nZADD\r\n"; 
9081                     robj 
*eleobj 
= dictGetEntryKey(de
); 
9082                     double *score 
= dictGetEntryVal(de
); 
9084                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9085                     if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9086                     if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
; 
9087                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
9089                 dictReleaseIterator(di
); 
9090             } else if (o
->type 
== REDIS_HASH
) { 
9091                 char cmd
[]="*4\r\n$4\r\nHSET\r\n"; 
9093                 /* Emit the HSETs needed to rebuild the hash */ 
9094                 if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
9095                     unsigned char *p 
= zipmapRewind(o
->ptr
); 
9096                     unsigned char *field
, *val
; 
9097                     unsigned int flen
, vlen
; 
9099                     while((p 
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) { 
9100                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9101                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9102                         if (fwriteBulkString(fp
,(char*)field
,flen
) == -1) 
9104                         if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1) 
9108                     dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
9111                     while((de 
= dictNext(di
)) != NULL
) { 
9112                         robj 
*field 
= dictGetEntryKey(de
); 
9113                         robj 
*val 
= dictGetEntryVal(de
); 
9115                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9116                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9117                         if (fwriteBulkObject(fp
,field
) == -1) return -1; 
9118                         if (fwriteBulkObject(fp
,val
) == -1) return -1; 
9120                     dictReleaseIterator(di
); 
9123                 redisPanic("Unknown object type"); 
9125             /* Save the expire time */ 
9126             if (expiretime 
!= -1) { 
9127                 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n"; 
9128                 /* If this key is already expired skip it */ 
9129                 if (expiretime 
< now
) continue; 
9130                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9131                 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9132                 if (fwriteBulkLongLong(fp
,expiretime
) == 0) goto werr
; 
9134             if (swapped
) decrRefCount(o
); 
9136         dictReleaseIterator(di
); 
9139     /* Make sure data will not remain on the OS's output buffers */ 
9141     aof_fsync(fileno(fp
)); 
9144     /* Use RENAME to make sure the DB file is changed atomically only 
9145      * if the generate DB file is ok. */ 
9146     if (rename(tmpfile
,filename
) == -1) { 
9147         redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
)); 
9151     redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed"); 
9157     redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
)); 
9158     if (di
) dictReleaseIterator(di
); 
9162 /* This is how rewriting of the append only file in background works: 
9164  * 1) The user calls BGREWRITEAOF 
9165  * 2) Redis calls this function, that forks(): 
9166  *    2a) the child rewrite the append only file in a temp file. 
9167  *    2b) the parent accumulates differences in server.bgrewritebuf. 
9168  * 3) When the child finished '2a' exists. 
9169  * 4) The parent will trap the exit code, if it's OK, will append the 
9170  *    data accumulated into server.bgrewritebuf into the temp file, and 
9171  *    finally will rename(2) the temp file in the actual file name. 
9172  *    The the new file is reopened as the new append only file. Profit! 
9174 static int rewriteAppendOnlyFileBackground(void) { 
9177     if (server
.bgrewritechildpid 
!= -1) return REDIS_ERR
; 
9178     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
9179     if ((childpid 
= fork()) == 0) { 
9183         if (server
.vm_enabled
) vmReopenSwapFile(); 
9185         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); 
9186         if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) { 
9193         if (childpid 
== -1) { 
9194             redisLog(REDIS_WARNING
, 
9195                 "Can't rewrite append only file in background: fork: %s", 
9199         redisLog(REDIS_NOTICE
, 
9200             "Background append only file rewriting started by pid %d",childpid
); 
9201         server
.bgrewritechildpid 
= childpid
; 
9202         updateDictResizePolicy(); 
9203         /* We set appendseldb to -1 in order to force the next call to the 
9204          * feedAppendOnlyFile() to issue a SELECT command, so the differences 
9205          * accumulated by the parent into server.bgrewritebuf will start 
9206          * with a SELECT statement and it will be safe to merge. */ 
9207         server
.appendseldb 
= -1; 
9210     return REDIS_OK
; /* unreached */ 
9213 static void bgrewriteaofCommand(redisClient 
*c
) { 
9214     if (server
.bgrewritechildpid 
!= -1) { 
9215         addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n")); 
9218     if (rewriteAppendOnlyFileBackground() == REDIS_OK
) { 
9219         char *status 
= "+Background append only file rewriting started\r\n"; 
9220         addReplySds(c
,sdsnew(status
)); 
9222         addReply(c
,shared
.err
); 
9226 static void aofRemoveTempFile(pid_t childpid
) { 
9229     snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
); 
9233 /* Virtual Memory is composed mainly of two subsystems: 
9234  * - Blocking Virutal Memory 
9235  * - Threaded Virtual Memory I/O 
9236  * The two parts are not fully decoupled, but functions are split among two 
9237  * different sections of the source code (delimited by comments) in order to 
9238  * make more clear what functionality is about the blocking VM and what about 
9239  * the threaded (not blocking) VM. 
9243  * Redis VM is a blocking VM (one that blocks reading swapped values from 
9244  * disk into memory when a value swapped out is needed in memory) that is made 
9245  * unblocking by trying to examine the command argument vector in order to 
9246  * load in background values that will likely be needed in order to exec 
9247  * the command. The command is executed only once all the relevant keys 
9248  * are loaded into memory. 
9250  * This basically is almost as simple of a blocking VM, but almost as parallel 
9251  * as a fully non-blocking VM. 
9254 /* =================== Virtual Memory - Blocking Side  ====================== */ 
9256 /* Create a VM pointer object. This kind of objects are used in place of 
9257  * values in the key -> value hash table, for swapped out objects. */ 
9258 static vmpointer 
*createVmPointer(int vtype
) { 
9259     vmpointer 
*vp 
= zmalloc(sizeof(vmpointer
)); 
9261     vp
->type 
= REDIS_VMPOINTER
; 
9262     vp
->storage 
= REDIS_VM_SWAPPED
; 
9267 static void vmInit(void) { 
9273     if (server
.vm_max_threads 
!= 0) 
9274         zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */ 
9276     redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
); 
9277     /* Try to open the old swap file, otherwise create it */ 
9278     if ((server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) { 
9279         server
.vm_fp 
= fopen(server
.vm_swap_file
,"w+b"); 
9281     if (server
.vm_fp 
== NULL
) { 
9282         redisLog(REDIS_WARNING
, 
9283             "Can't open the swap file: %s. Exiting.", 
9287     server
.vm_fd 
= fileno(server
.vm_fp
); 
9288     /* Lock the swap file for writing, this is useful in order to avoid 
9289      * another instance to use the same swap file for a config error. */ 
9290     fl
.l_type 
= F_WRLCK
; 
9291     fl
.l_whence 
= SEEK_SET
; 
9292     fl
.l_start 
= fl
.l_len 
= 0; 
9293     if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) { 
9294         redisLog(REDIS_WARNING
, 
9295             "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
)); 
9299     server
.vm_next_page 
= 0; 
9300     server
.vm_near_pages 
= 0; 
9301     server
.vm_stats_used_pages 
= 0; 
9302     server
.vm_stats_swapped_objects 
= 0; 
9303     server
.vm_stats_swapouts 
= 0; 
9304     server
.vm_stats_swapins 
= 0; 
9305     totsize 
= server
.vm_pages
*server
.vm_page_size
; 
9306     redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
); 
9307     if (ftruncate(server
.vm_fd
,totsize
) == -1) { 
9308         redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.", 
9312         redisLog(REDIS_NOTICE
,"Swap file allocated with success"); 
9314     server
.vm_bitmap 
= zmalloc((server
.vm_pages
+7)/8); 
9315     redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages", 
9316         (long long) (server
.vm_pages
+7)/8, server
.vm_pages
); 
9317     memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8); 
9319     /* Initialize threaded I/O (used by Virtual Memory) */ 
9320     server
.io_newjobs 
= listCreate(); 
9321     server
.io_processing 
= listCreate(); 
9322     server
.io_processed 
= listCreate(); 
9323     server
.io_ready_clients 
= listCreate(); 
9324     pthread_mutex_init(&server
.io_mutex
,NULL
); 
9325     pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
); 
9326     pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
); 
9327     server
.io_active_threads 
= 0; 
9328     if (pipe(pipefds
) == -1) { 
9329         redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting." 
9333     server
.io_ready_pipe_read 
= pipefds
[0]; 
9334     server
.io_ready_pipe_write 
= pipefds
[1]; 
9335     redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
); 
9336     /* LZF requires a lot of stack */ 
9337     pthread_attr_init(&server
.io_threads_attr
); 
9338     pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
); 
9339     while (stacksize 
< REDIS_THREAD_STACK_SIZE
) stacksize 
*= 2; 
9340     pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
); 
9341     /* Listen for events in the threaded I/O pipe */ 
9342     if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
, 
9343         vmThreadedIOCompletedJob
, NULL
) == AE_ERR
) 
9344         oom("creating file event"); 
9347 /* Mark the page as used */ 
9348 static void vmMarkPageUsed(off_t page
) { 
9349     off_t byte 
= page
/8; 
9351     redisAssert(vmFreePage(page
) == 1); 
9352     server
.vm_bitmap
[byte
] |= 1<<bit
; 
9355 /* Mark N contiguous pages as used, with 'page' being the first. */ 
9356 static void vmMarkPagesUsed(off_t page
, off_t count
) { 
9359     for (j 
= 0; j 
< count
; j
++) 
9360         vmMarkPageUsed(page
+j
); 
9361     server
.vm_stats_used_pages 
+= count
; 
9362     redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n", 
9363         (long long)count
, (long long)page
); 
9366 /* Mark the page as free */ 
9367 static void vmMarkPageFree(off_t page
) { 
9368     off_t byte 
= page
/8; 
9370     redisAssert(vmFreePage(page
) == 0); 
9371     server
.vm_bitmap
[byte
] &= ~(1<<bit
); 
9374 /* Mark N contiguous pages as free, with 'page' being the first. */ 
9375 static void vmMarkPagesFree(off_t page
, off_t count
) { 
9378     for (j 
= 0; j 
< count
; j
++) 
9379         vmMarkPageFree(page
+j
); 
9380     server
.vm_stats_used_pages 
-= count
; 
9381     redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n", 
9382         (long long)count
, (long long)page
); 
9385 /* Test if the page is free */ 
9386 static int vmFreePage(off_t page
) { 
9387     off_t byte 
= page
/8; 
9389     return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0; 
9392 /* Find N contiguous free pages storing the first page of the cluster in *first. 
9393  * Returns REDIS_OK if it was able to find N contiguous pages, otherwise 
9394  * REDIS_ERR is returned. 
9396  * This function uses a simple algorithm: we try to allocate 
9397  * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start 
9398  * again from the start of the swap file searching for free spaces. 
9400  * If it looks pretty clear that there are no free pages near our offset 
9401  * we try to find less populated places doing a forward jump of 
9402  * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages 
9403  * without hurry, and then we jump again and so forth... 
9405  * This function can be improved using a free list to avoid to guess 
9406  * too much, since we could collect data about freed pages. 
9408  * note: I implemented this function just after watching an episode of 
9409  * Battlestar Galactica, where the hybrid was continuing to say "JUMP!" 
9411 static int vmFindContiguousPages(off_t 
*first
, off_t n
) { 
9412     off_t base
, offset 
= 0, since_jump 
= 0, numfree 
= 0; 
9414     if (server
.vm_near_pages 
== REDIS_VM_MAX_NEAR_PAGES
) { 
9415         server
.vm_near_pages 
= 0; 
9416         server
.vm_next_page 
= 0; 
9418     server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */ 
9419     base 
= server
.vm_next_page
; 
9421     while(offset 
< server
.vm_pages
) { 
9422         off_t 
this = base
+offset
; 
9424         /* If we overflow, restart from page zero */ 
9425         if (this >= server
.vm_pages
) { 
9426             this -= server
.vm_pages
; 
9428                 /* Just overflowed, what we found on tail is no longer 
9429                  * interesting, as it's no longer contiguous. */ 
9433         if (vmFreePage(this)) { 
9434             /* This is a free page */ 
9436             /* Already got N free pages? Return to the caller, with success */ 
9438                 *first 
= this-(n
-1); 
9439                 server
.vm_next_page 
= this+1; 
9440                 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
); 
9444             /* The current one is not a free page */ 
9448         /* Fast-forward if the current page is not free and we already 
9449          * searched enough near this place. */ 
9451         if (!numfree 
&& since_jump 
>= REDIS_VM_MAX_RANDOM_JUMP
/4) { 
9452             offset 
+= random() % REDIS_VM_MAX_RANDOM_JUMP
; 
9454             /* Note that even if we rewind after the jump, we are don't need 
9455              * to make sure numfree is set to zero as we only jump *if* it 
9456              * is set to zero. */ 
9458             /* Otherwise just check the next page */ 
9465 /* Write the specified object at the specified page of the swap file */ 
9466 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
) { 
9467     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
9468     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
9469         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
9470         redisLog(REDIS_WARNING
, 
9471             "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s", 
9475     rdbSaveObject(server
.vm_fp
,o
); 
9476     fflush(server
.vm_fp
); 
9477     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
9481 /* Transfers the 'val' object to disk. Store all the information 
9482  * a 'vmpointer' object containing all the information needed to load the 
9483  * object back later is returned. 
9485  * If we can't find enough contiguous empty pages to swap the object on disk 
9486  * NULL is returned. */ 
9487 static vmpointer 
*vmSwapObjectBlocking(robj 
*val
) { 
9488     off_t pages 
= rdbSavedObjectPages(val
,NULL
); 
9492     assert(val
->storage 
== REDIS_VM_MEMORY
); 
9493     assert(val
->refcount 
== 1); 
9494     if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return NULL
; 
9495     if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return NULL
; 
9497     vp 
= createVmPointer(val
->type
); 
9499     vp
->usedpages 
= pages
; 
9500     decrRefCount(val
); /* Deallocate the object from memory. */ 
9501     vmMarkPagesUsed(page
,pages
); 
9502     redisLog(REDIS_DEBUG
,"VM: object %p swapped out at %lld (%lld pages)", 
9504         (unsigned long long) page
, (unsigned long long) pages
); 
9505     server
.vm_stats_swapped_objects
++; 
9506     server
.vm_stats_swapouts
++; 
9510 static robj 
*vmReadObjectFromSwap(off_t page
, int type
) { 
9513     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
9514     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
9515         redisLog(REDIS_WARNING
, 
9516             "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s", 
9520     o 
= rdbLoadObject(type
,server
.vm_fp
); 
9522         redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
)); 
9525     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
9529 /* Load the specified object from swap to memory. 
9530  * The newly allocated object is returned. 
9532  * If preview is true the unserialized object is returned to the caller but 
9533  * the pages are not marked as freed, nor the vp object is freed. */ 
9534 static robj 
*vmGenericLoadObject(vmpointer 
*vp
, int preview
) { 
9537     redisAssert(vp
->type 
== REDIS_VMPOINTER 
&& 
9538         (vp
->storage 
== REDIS_VM_SWAPPED 
|| vp
->storage 
== REDIS_VM_LOADING
)); 
9539     val 
= vmReadObjectFromSwap(vp
->page
,vp
->vtype
); 
9541         redisLog(REDIS_DEBUG
, "VM: object %p loaded from disk", (void*)vp
); 
9542         vmMarkPagesFree(vp
->page
,vp
->usedpages
); 
9544         server
.vm_stats_swapped_objects
--; 
9546         redisLog(REDIS_DEBUG
, "VM: object %p previewed from disk", (void*)vp
); 
9548     server
.vm_stats_swapins
++; 
9552 /* Plain object loading, from swap to memory. 
9554  * 'o' is actually a redisVmPointer structure that will be freed by the call. 
9555  * The return value is the loaded object. */ 
9556 static robj 
*vmLoadObject(robj 
*o
) { 
9557     /* If we are loading the object in background, stop it, we 
9558      * need to load this object synchronously ASAP. */ 
9559     if (o
->storage 
== REDIS_VM_LOADING
) 
9560         vmCancelThreadedIOJob(o
); 
9561     return vmGenericLoadObject((vmpointer
*)o
,0); 
9564 /* Just load the value on disk, without to modify the key. 
9565  * This is useful when we want to perform some operation on the value 
9566  * without to really bring it from swap to memory, like while saving the 
9567  * dataset or rewriting the append only log. */ 
9568 static robj 
*vmPreviewObject(robj 
*o
) { 
9569     return vmGenericLoadObject((vmpointer
*)o
,1); 
9572 /* How a good candidate is this object for swapping? 
9573  * The better candidate it is, the greater the returned value. 
9575  * Currently we try to perform a fast estimation of the object size in 
9576  * memory, and combine it with aging informations. 
9578  * Basically swappability = idle-time * log(estimated size) 
9580  * Bigger objects are preferred over smaller objects, but not 
9581  * proportionally, this is why we use the logarithm. This algorithm is 
9582  * just a first try and will probably be tuned later. */ 
9583 static double computeObjectSwappability(robj 
*o
) { 
9584     /* actual age can be >= minage, but not < minage. As we use wrapping 
9585      * 21 bit clocks with minutes resolution for the LRU. */ 
9586     time_t minage 
= abs(server
.lruclock 
- o
->lru
); 
9587     long asize 
= 0, elesize
; 
9592     struct dictEntry 
*de
; 
9595     if (minage 
<= 0) return 0; 
9598         if (o
->encoding 
!= REDIS_ENCODING_RAW
) { 
9601             asize 
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2; 
9605         if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
9606             asize 
= sizeof(*o
)+ziplistSize(o
->ptr
); 
9610             asize 
= sizeof(list
); 
9613                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9614                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9615                 asize 
+= (sizeof(listNode
)+elesize
)*listLength(l
); 
9621         z 
= (o
->type 
== REDIS_ZSET
); 
9622         d 
= z 
? ((zset
*)o
->ptr
)->dict 
: o
->ptr
; 
9624         asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
9625         if (z
) asize 
+= sizeof(zset
)-sizeof(dict
); 
9627             de 
= dictGetRandomKey(d
); 
9628             ele 
= dictGetEntryKey(de
); 
9629             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9630                             (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9631             asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
9632             if (z
) asize 
+= sizeof(zskiplistNode
)*dictSize(d
); 
9636         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
9637             unsigned char *p 
= zipmapRewind((unsigned char*)o
->ptr
); 
9638             unsigned int len 
= zipmapLen((unsigned char*)o
->ptr
); 
9639             unsigned int klen
, vlen
; 
9640             unsigned char *key
, *val
; 
9642             if ((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) { 
9646             asize 
= len
*(klen
+vlen
+3); 
9647         } else if (o
->encoding 
== REDIS_ENCODING_HT
) { 
9649             asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
9651                 de 
= dictGetRandomKey(d
); 
9652                 ele 
= dictGetEntryKey(de
); 
9653                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9654                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9655                 ele 
= dictGetEntryVal(de
); 
9656                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9657                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9658                 asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
9663     return (double)minage
*log(1+asize
); 
9666 /* Try to swap an object that's a good candidate for swapping. 
9667  * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible 
9668  * to swap any object at all. 
9670  * If 'usethreaded' is true, Redis will try to swap the object in background 
9671  * using I/O threads. */ 
9672 static int vmSwapOneObject(int usethreads
) { 
9674     struct dictEntry 
*best 
= NULL
; 
9675     double best_swappability 
= 0; 
9676     redisDb 
*best_db 
= NULL
; 
9680     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
9681         redisDb 
*db 
= server
.db
+j
; 
9682         /* Why maxtries is set to 100? 
9683          * Because this way (usually) we'll find 1 object even if just 1% - 2% 
9684          * are swappable objects */ 
9687         if (dictSize(db
->dict
) == 0) continue; 
9688         for (i 
= 0; i 
< 5; i
++) { 
9690             double swappability
; 
9692             if (maxtries
) maxtries
--; 
9693             de 
= dictGetRandomKey(db
->dict
); 
9694             val 
= dictGetEntryVal(de
); 
9695             /* Only swap objects that are currently in memory. 
9697              * Also don't swap shared objects: not a good idea in general and 
9698              * we need to ensure that the main thread does not touch the 
9699              * object while the I/O thread is using it, but we can't 
9700              * control other keys without adding additional mutex. */ 
9701             if (val
->storage 
!= REDIS_VM_MEMORY 
|| val
->refcount 
!= 1) { 
9702                 if (maxtries
) i
--; /* don't count this try */ 
9705             swappability 
= computeObjectSwappability(val
); 
9706             if (!best 
|| swappability 
> best_swappability
) { 
9708                 best_swappability 
= swappability
; 
9713     if (best 
== NULL
) return REDIS_ERR
; 
9714     key 
= dictGetEntryKey(best
); 
9715     val 
= dictGetEntryVal(best
); 
9717     redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f", 
9718         key
, best_swappability
); 
9722         robj 
*keyobj 
= createStringObject(key
,sdslen(key
)); 
9723         vmSwapObjectThreaded(keyobj
,val
,best_db
); 
9724         decrRefCount(keyobj
); 
9729         if ((vp 
= vmSwapObjectBlocking(val
)) != NULL
) { 
9730             dictGetEntryVal(best
) = vp
; 
9738 static int vmSwapOneObjectBlocking() { 
9739     return vmSwapOneObject(0); 
9742 static int vmSwapOneObjectThreaded() { 
9743     return vmSwapOneObject(1); 
9746 /* Return true if it's safe to swap out objects in a given moment. 
9747  * Basically we don't want to swap objects out while there is a BGSAVE 
9748  * or a BGAEOREWRITE running in backgroud. */ 
9749 static int vmCanSwapOut(void) { 
9750     return (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1); 
9753 /* =================== Virtual Memory - Threaded I/O  ======================= */ 
9755 static void freeIOJob(iojob 
*j
) { 
9756     if ((j
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
9757         j
->type 
== REDIS_IOJOB_DO_SWAP 
|| 
9758         j
->type 
== REDIS_IOJOB_LOAD
) && j
->val 
!= NULL
) 
9760          /* we fix the storage type, otherwise decrRefCount() will try to 
9761           * kill the I/O thread Job (that does no longer exists). */ 
9762         if (j
->val
->storage 
== REDIS_VM_SWAPPING
) 
9763             j
->val
->storage 
= REDIS_VM_MEMORY
; 
9764         decrRefCount(j
->val
); 
9766     decrRefCount(j
->key
); 
9770 /* Every time a thread finished a Job, it writes a byte into the write side 
9771  * of an unix pipe in order to "awake" the main thread, and this function 
9773 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, 
9777     int retval
, processed 
= 0, toprocess 
= -1, trytoswap 
= 1; 
9779     REDIS_NOTUSED(mask
); 
9780     REDIS_NOTUSED(privdata
); 
9782     /* For every byte we read in the read side of the pipe, there is one 
9783      * I/O job completed to process. */ 
9784     while((retval 
= read(fd
,buf
,1)) == 1) { 
9787         struct dictEntry 
*de
; 
9789         redisLog(REDIS_DEBUG
,"Processing I/O completed job"); 
9791         /* Get the processed element (the oldest one) */ 
9793         assert(listLength(server
.io_processed
) != 0); 
9794         if (toprocess 
== -1) { 
9795             toprocess 
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100; 
9796             if (toprocess 
<= 0) toprocess 
= 1; 
9798         ln 
= listFirst(server
.io_processed
); 
9800         listDelNode(server
.io_processed
,ln
); 
9802         /* If this job is marked as canceled, just ignore it */ 
9807         /* Post process it in the main thread, as there are things we 
9808          * can do just here to avoid race conditions and/or invasive locks */ 
9809         redisLog(REDIS_DEBUG
,"COMPLETED Job type: %d, ID %p, key: %s", j
->type
, (void*)j
->id
, (unsigned char*)j
->key
->ptr
); 
9810         de 
= dictFind(j
->db
->dict
,j
->key
->ptr
); 
9811         redisAssert(de 
!= NULL
); 
9812         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
9814             vmpointer 
*vp 
= dictGetEntryVal(de
); 
9816             /* Key loaded, bring it at home */ 
9817             vmMarkPagesFree(vp
->page
,vp
->usedpages
); 
9818             redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)", 
9819                 (unsigned char*) j
->key
->ptr
); 
9820             server
.vm_stats_swapped_objects
--; 
9821             server
.vm_stats_swapins
++; 
9822             dictGetEntryVal(de
) = j
->val
; 
9823             incrRefCount(j
->val
); 
9825             /* Handle clients waiting for this key to be loaded. */ 
9826             handleClientsBlockedOnSwappedKey(db
,j
->key
); 
9829         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
9830             /* Now we know the amount of pages required to swap this object. 
9831              * Let's find some space for it, and queue this task again 
9832              * rebranded as REDIS_IOJOB_DO_SWAP. */ 
9833             if (!vmCanSwapOut() || 
9834                 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
) 
9836                 /* Ooops... no space or we can't swap as there is 
9837                  * a fork()ed Redis trying to save stuff on disk. */ 
9838                 j
->val
->storage 
= REDIS_VM_MEMORY
; /* undo operation */ 
9841                 /* Note that we need to mark this pages as used now, 
9842                  * if the job will be canceled, we'll mark them as freed 
9844                 vmMarkPagesUsed(j
->page
,j
->pages
); 
9845                 j
->type 
= REDIS_IOJOB_DO_SWAP
; 
9850         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
9853             /* Key swapped. We can finally free some memory. */ 
9854             if (j
->val
->storage 
!= REDIS_VM_SWAPPING
) { 
9855                 vmpointer 
*vp 
= (vmpointer
*) j
->id
; 
9856                 printf("storage: %d\n",vp
->storage
); 
9857                 printf("key->name: %s\n",(char*)j
->key
->ptr
); 
9858                 printf("val: %p\n",(void*)j
->val
); 
9859                 printf("val->type: %d\n",j
->val
->type
); 
9860                 printf("val->ptr: %s\n",(char*)j
->val
->ptr
); 
9862             redisAssert(j
->val
->storage 
== REDIS_VM_SWAPPING
); 
9863             vp 
= createVmPointer(j
->val
->type
); 
9865             vp
->usedpages 
= j
->pages
; 
9866             dictGetEntryVal(de
) = vp
; 
9867             /* Fix the storage otherwise decrRefCount will attempt to 
9868              * remove the associated I/O job */ 
9869             j
->val
->storage 
= REDIS_VM_MEMORY
; 
9870             decrRefCount(j
->val
); 
9871             redisLog(REDIS_DEBUG
, 
9872                 "VM: object %s swapped out at %lld (%lld pages) (threaded)", 
9873                 (unsigned char*) j
->key
->ptr
, 
9874                 (unsigned long long) j
->page
, (unsigned long long) j
->pages
); 
9875             server
.vm_stats_swapped_objects
++; 
9876             server
.vm_stats_swapouts
++; 
9878             /* Put a few more swap requests in queue if we are still 
9880             if (trytoswap 
&& vmCanSwapOut() && 
9881                 zmalloc_used_memory() > server
.vm_max_memory
) 
9886                     more 
= listLength(server
.io_newjobs
) < 
9887                             (unsigned) server
.vm_max_threads
; 
9889                     /* Don't waste CPU time if swappable objects are rare. */ 
9890                     if (vmSwapOneObjectThreaded() == REDIS_ERR
) { 
9898         if (processed 
== toprocess
) return; 
9900     if (retval 
< 0 && errno 
!= EAGAIN
) { 
9901         redisLog(REDIS_WARNING
, 
9902             "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", 
9907 static void lockThreadedIO(void) { 
9908     pthread_mutex_lock(&server
.io_mutex
); 
9911 static void unlockThreadedIO(void) { 
9912     pthread_mutex_unlock(&server
.io_mutex
); 
9915 /* Remove the specified object from the threaded I/O queue if still not 
9916  * processed, otherwise make sure to flag it as canceled. */ 
9917 static void vmCancelThreadedIOJob(robj 
*o
) { 
9919         server
.io_newjobs
,      /* 0 */ 
9920         server
.io_processing
,   /* 1 */ 
9921         server
.io_processed     
/* 2 */ 
9925     assert(o
->storage 
== REDIS_VM_LOADING 
|| o
->storage 
== REDIS_VM_SWAPPING
); 
9928     /* Search for a matching object in one of the queues */ 
9929     for (i 
= 0; i 
< 3; i
++) { 
9933         listRewind(lists
[i
],&li
); 
9934         while ((ln 
= listNext(&li
)) != NULL
) { 
9935             iojob 
*job 
= ln
->value
; 
9937             if (job
->canceled
) continue; /* Skip this, already canceled. */ 
9939                 redisLog(REDIS_DEBUG
,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n", 
9940                     (void*)job
, (char*)job
->key
->ptr
, job
->type
, i
); 
9941                 /* Mark the pages as free since the swap didn't happened 
9942                  * or happened but is now discarded. */ 
9943                 if (i 
!= 1 && job
->type 
== REDIS_IOJOB_DO_SWAP
) 
9944                     vmMarkPagesFree(job
->page
,job
->pages
); 
9945                 /* Cancel the job. It depends on the list the job is 
9948                 case 0: /* io_newjobs */ 
9949                     /* If the job was yet not processed the best thing to do 
9950                      * is to remove it from the queue at all */ 
9952                     listDelNode(lists
[i
],ln
); 
9954                 case 1: /* io_processing */ 
9955                     /* Oh Shi- the thread is messing with the Job: 
9957                      * Probably it's accessing the object if this is a 
9958                      * PREPARE_SWAP or DO_SWAP job. 
9959                      * If it's a LOAD job it may be reading from disk and 
9960                      * if we don't wait for the job to terminate before to 
9961                      * cancel it, maybe in a few microseconds data can be 
9962                      * corrupted in this pages. So the short story is: 
9964                      * Better to wait for the job to move into the 
9965                      * next queue (processed)... */ 
9967                     /* We try again and again until the job is completed. */ 
9969                     /* But let's wait some time for the I/O thread 
9970                      * to finish with this job. After all this condition 
9971                      * should be very rare. */ 
9974                 case 2: /* io_processed */ 
9975                     /* The job was already processed, that's easy... 
9976                      * just mark it as canceled so that we'll ignore it 
9977                      * when processing completed jobs. */ 
9981                 /* Finally we have to adjust the storage type of the object 
9982                  * in order to "UNDO" the operaiton. */ 
9983                 if (o
->storage 
== REDIS_VM_LOADING
) 
9984                     o
->storage 
= REDIS_VM_SWAPPED
; 
9985                 else if (o
->storage 
== REDIS_VM_SWAPPING
) 
9986                     o
->storage 
= REDIS_VM_MEMORY
; 
9988                 redisLog(REDIS_DEBUG
,"*** DONE"); 
9994     printf("Not found: %p\n", (void*)o
); 
9995     redisAssert(1 != 1); /* We should never reach this */ 
9998 static void *IOThreadEntryPoint(void *arg
) { 
10001     REDIS_NOTUSED(arg
); 
10003     pthread_detach(pthread_self()); 
10005         /* Get a new job to process */ 
10007         if (listLength(server
.io_newjobs
) == 0) { 
10008             /* No new jobs in queue, exit. */ 
10009             redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do", 
10010                 (long) pthread_self()); 
10011             server
.io_active_threads
--; 
10012             unlockThreadedIO(); 
10015         ln 
= listFirst(server
.io_newjobs
); 
10017         listDelNode(server
.io_newjobs
,ln
); 
10018         /* Add the job in the processing queue */ 
10019         j
->thread 
= pthread_self(); 
10020         listAddNodeTail(server
.io_processing
,j
); 
10021         ln 
= listLast(server
.io_processing
); /* We use ln later to remove it */ 
10022         unlockThreadedIO(); 
10023         redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'", 
10024             (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
); 
10026         /* Process the Job */ 
10027         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
10028             vmpointer 
*vp 
= (vmpointer
*)j
->id
; 
10029             j
->val 
= vmReadObjectFromSwap(j
->page
,vp
->vtype
); 
10030         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
10031             FILE *fp 
= fopen("/dev/null","w+"); 
10032             j
->pages 
= rdbSavedObjectPages(j
->val
,fp
); 
10034         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
10035             if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
) 
10039         /* Done: insert the job into the processed queue */ 
10040         redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)", 
10041             (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
); 
10043         listDelNode(server
.io_processing
,ln
); 
10044         listAddNodeTail(server
.io_processed
,j
); 
10045         unlockThreadedIO(); 
10047         /* Signal the main thread there is new stuff to process */ 
10048         assert(write(server
.io_ready_pipe_write
,"x",1) == 1); 
10050     return NULL
; /* never reached */ 
10053 static void spawnIOThread(void) { 
10055     sigset_t mask
, omask
; 
10058     sigemptyset(&mask
); 
10059     sigaddset(&mask
,SIGCHLD
); 
10060     sigaddset(&mask
,SIGHUP
); 
10061     sigaddset(&mask
,SIGPIPE
); 
10062     pthread_sigmask(SIG_SETMASK
, &mask
, &omask
); 
10063     while ((err 
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) { 
10064         redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s", 
10068     pthread_sigmask(SIG_SETMASK
, &omask
, NULL
); 
10069     server
.io_active_threads
++; 
10072 /* We need to wait for the last thread to exit before we are able to 
10073  * fork() in order to BGSAVE or BGREWRITEAOF. */ 
10074 static void waitEmptyIOJobsQueue(void) { 
10076         int io_processed_len
; 
10079         if (listLength(server
.io_newjobs
) == 0 && 
10080             listLength(server
.io_processing
) == 0 && 
10081             server
.io_active_threads 
== 0) 
10083             unlockThreadedIO(); 
10086         /* While waiting for empty jobs queue condition we post-process some 
10087          * finshed job, as I/O threads may be hanging trying to write against 
10088          * the io_ready_pipe_write FD but there are so much pending jobs that 
10089          * it's blocking. */ 
10090         io_processed_len 
= listLength(server
.io_processed
); 
10091         unlockThreadedIO(); 
10092         if (io_processed_len
) { 
10093             vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0); 
10094             usleep(1000); /* 1 millisecond */ 
10096             usleep(10000); /* 10 milliseconds */ 
10101 static void vmReopenSwapFile(void) { 
10102     /* Note: we don't close the old one as we are in the child process 
10103      * and don't want to mess at all with the original file object. */ 
10104     server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b"); 
10105     if (server
.vm_fp 
== NULL
) { 
10106         redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.", 
10107             server
.vm_swap_file
); 
10110     server
.vm_fd 
= fileno(server
.vm_fp
); 
10113 /* This function must be called while with threaded IO locked */ 
10114 static void queueIOJob(iojob 
*j
) { 
10115     redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n", 
10116         (void*)j
, j
->type
, (char*)j
->key
->ptr
); 
10117     listAddNodeTail(server
.io_newjobs
,j
); 
10118     if (server
.io_active_threads 
< server
.vm_max_threads
) 
10122 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
) { 
10125     j 
= zmalloc(sizeof(*j
)); 
10126     j
->type 
= REDIS_IOJOB_PREPARE_SWAP
; 
10130     j
->id 
= j
->val 
= val
; 
10133     j
->thread 
= (pthread_t
) -1; 
10134     val
->storage 
= REDIS_VM_SWAPPING
; 
10138     unlockThreadedIO(); 
10142 /* ============ Virtual Memory - Blocking clients on missing keys =========== */ 
10144 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded. 
10145  * If there is not already a job loading the key, it is craeted. 
10146  * The key is added to the io_keys list in the client structure, and also 
10147  * in the hash table mapping swapped keys to waiting clients, that is, 
10148  * server.io_waited_keys. */ 
10149 static int waitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
10150     struct dictEntry 
*de
; 
10154     /* If the key does not exist or is already in RAM we don't need to 
10155      * block the client at all. */ 
10156     de 
= dictFind(c
->db
->dict
,key
->ptr
); 
10157     if (de 
== NULL
) return 0; 
10158     o 
= dictGetEntryVal(de
); 
10159     if (o
->storage 
== REDIS_VM_MEMORY
) { 
10161     } else if (o
->storage 
== REDIS_VM_SWAPPING
) { 
10162         /* We were swapping the key, undo it! */ 
10163         vmCancelThreadedIOJob(o
); 
10167     /* OK: the key is either swapped, or being loaded just now. */ 
10169     /* Add the key to the list of keys this client is waiting for. 
10170      * This maps clients to keys they are waiting for. */ 
10171     listAddNodeTail(c
->io_keys
,key
); 
10174     /* Add the client to the swapped keys => clients waiting map. */ 
10175     de 
= dictFind(c
->db
->io_keys
,key
); 
10179         /* For every key we take a list of clients blocked for it */ 
10181         retval 
= dictAdd(c
->db
->io_keys
,key
,l
); 
10183         assert(retval 
== DICT_OK
); 
10185         l 
= dictGetEntryVal(de
); 
10187     listAddNodeTail(l
,c
); 
10189     /* Are we already loading the key from disk? If not create a job */ 
10190     if (o
->storage 
== REDIS_VM_SWAPPED
) { 
10192         vmpointer 
*vp 
= (vmpointer
*)o
; 
10194         o
->storage 
= REDIS_VM_LOADING
; 
10195         j 
= zmalloc(sizeof(*j
)); 
10196         j
->type 
= REDIS_IOJOB_LOAD
; 
10201         j
->page 
= vp
->page
; 
10204         j
->thread 
= (pthread_t
) -1; 
10207         unlockThreadedIO(); 
10212 /* Preload keys for any command with first, last and step values for 
10213  * the command keys prototype, as defined in the command table. */ 
10214 static void waitForMultipleSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
10216     if (cmd
->vm_firstkey 
== 0) return; 
10217     last 
= cmd
->vm_lastkey
; 
10218     if (last 
< 0) last 
= argc
+last
; 
10219     for (j 
= cmd
->vm_firstkey
; j 
<= last
; j 
+= cmd
->vm_keystep
) { 
10220         redisAssert(j 
< argc
); 
10221         waitForSwappedKey(c
,argv
[j
]); 
10225 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands. 
10226  * Note that the number of keys to preload is user-defined, so we need to 
10227  * apply a sanity check against argc. */ 
10228 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
10230     REDIS_NOTUSED(cmd
); 
10232     num 
= atoi(argv
[2]->ptr
); 
10233     if (num 
> (argc
-3)) return; 
10234     for (i 
= 0; i 
< num
; i
++) { 
10235         waitForSwappedKey(c
,argv
[3+i
]); 
10239 /* Preload keys needed to execute the entire MULTI/EXEC block. 
10241  * This function is called by blockClientOnSwappedKeys when EXEC is issued, 
10242  * and will block the client when any command requires a swapped out value. */ 
10243 static void execBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
10245     struct redisCommand 
*mcmd
; 
10247     REDIS_NOTUSED(cmd
); 
10248     REDIS_NOTUSED(argc
); 
10249     REDIS_NOTUSED(argv
); 
10251     if (!(c
->flags 
& REDIS_MULTI
)) return; 
10252     for (i 
= 0; i 
< c
->mstate
.count
; i
++) { 
10253         mcmd 
= c
->mstate
.commands
[i
].cmd
; 
10254         margc 
= c
->mstate
.commands
[i
].argc
; 
10255         margv 
= c
->mstate
.commands
[i
].argv
; 
10257         if (mcmd
->vm_preload_proc 
!= NULL
) { 
10258             mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
); 
10260             waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
); 
10265 /* Is this client attempting to run a command against swapped keys? 
10266  * If so, block it ASAP, load the keys in background, then resume it. 
10268  * The important idea about this function is that it can fail! If keys will 
10269  * still be swapped when the client is resumed, this key lookups will 
10270  * just block loading keys from disk. In practical terms this should only 
10271  * happen with SORT BY command or if there is a bug in this function. 
10273  * Return 1 if the client is marked as blocked, 0 if the client can 
10274  * continue as the keys it is going to access appear to be in memory. */ 
10275 static int blockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
) { 
10276     if (cmd
->vm_preload_proc 
!= NULL
) { 
10277         cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
); 
10279         waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
); 
10282     /* If the client was blocked for at least one key, mark it as blocked. */ 
10283     if (listLength(c
->io_keys
)) { 
10284         c
->flags 
|= REDIS_IO_WAIT
; 
10285         aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
10286         server
.vm_blocked_clients
++; 
10293 /* Remove the 'key' from the list of blocked keys for a given client. 
10295  * The function returns 1 when there are no longer blocking keys after 
10296  * the current one was removed (and the client can be unblocked). */ 
10297 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
10301     struct dictEntry 
*de
; 
10303     /* Remove the key from the list of keys this client is waiting for. */ 
10304     listRewind(c
->io_keys
,&li
); 
10305     while ((ln 
= listNext(&li
)) != NULL
) { 
10306         if (equalStringObjects(ln
->value
,key
)) { 
10307             listDelNode(c
->io_keys
,ln
); 
10311     assert(ln 
!= NULL
); 
10313     /* Remove the client form the key => waiting clients map. */ 
10314     de 
= dictFind(c
->db
->io_keys
,key
); 
10315     assert(de 
!= NULL
); 
10316     l 
= dictGetEntryVal(de
); 
10317     ln 
= listSearchKey(l
,c
); 
10318     assert(ln 
!= NULL
); 
10320     if (listLength(l
) == 0) 
10321         dictDelete(c
->db
->io_keys
,key
); 
10323     return listLength(c
->io_keys
) == 0; 
10326 /* Every time we now a key was loaded back in memory, we handle clients 
10327  * waiting for this key if any. */ 
10328 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
) { 
10329     struct dictEntry 
*de
; 
10334     de 
= dictFind(db
->io_keys
,key
); 
10337     l 
= dictGetEntryVal(de
); 
10338     len 
= listLength(l
); 
10339     /* Note: we can't use something like while(listLength(l)) as the list 
10340      * can be freed by the calling function when we remove the last element. */ 
10343         redisClient 
*c 
= ln
->value
; 
10345         if (dontWaitForSwappedKey(c
,key
)) { 
10346             /* Put the client in the list of clients ready to go as we 
10347              * loaded all the keys about it. */ 
10348             listAddNodeTail(server
.io_ready_clients
,c
); 
10353 /* =========================== Remote Configuration ========================= */ 
10355 static void configSetCommand(redisClient 
*c
) { 
10356     robj 
*o 
= getDecodedObject(c
->argv
[3]); 
10359     if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) { 
10360         zfree(server
.dbfilename
); 
10361         server
.dbfilename 
= zstrdup(o
->ptr
); 
10362     } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) { 
10363         zfree(server
.requirepass
); 
10364         server
.requirepass 
= zstrdup(o
->ptr
); 
10365     } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) { 
10366         zfree(server
.masterauth
); 
10367         server
.masterauth 
= zstrdup(o
->ptr
); 
10368     } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) { 
10369         if (getLongLongFromObject(o
,&ll
) == REDIS_ERR 
|| 
10370             ll 
< 0) goto badfmt
; 
10371         server
.maxmemory 
= ll
; 
10372     } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) { 
10373         if (getLongLongFromObject(o
,&ll
) == REDIS_ERR 
|| 
10374             ll 
< 0 || ll 
> LONG_MAX
) goto badfmt
; 
10375         server
.maxidletime 
= ll
; 
10376     } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) { 
10377         if (!strcasecmp(o
->ptr
,"no")) { 
10378             server
.appendfsync 
= APPENDFSYNC_NO
; 
10379         } else if (!strcasecmp(o
->ptr
,"everysec")) { 
10380             server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
10381         } else if (!strcasecmp(o
->ptr
,"always")) { 
10382             server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
10386     } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) { 
10387         int yn 
= yesnotoi(o
->ptr
); 
10389         if (yn 
== -1) goto badfmt
; 
10390         server
.no_appendfsync_on_rewrite 
= yn
; 
10391     } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) { 
10392         int old 
= server
.appendonly
; 
10393         int new = yesnotoi(o
->ptr
); 
10395         if (new == -1) goto badfmt
; 
10400                 if (startAppendOnly() == REDIS_ERR
) { 
10401                     addReplySds(c
,sdscatprintf(sdsempty(), 
10402                         "-ERR Unable to turn on AOF. Check server logs.\r\n")); 
10408     } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) { 
10410         sds 
*v 
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
); 
10412         /* Perform sanity check before setting the new config: 
10413          * - Even number of args 
10414          * - Seconds >= 1, changes >= 0 */ 
10416             sdsfreesplitres(v
,vlen
); 
10419         for (j 
= 0; j 
< vlen
; j
++) { 
10423             val 
= strtoll(v
[j
], &eptr
, 10); 
10424             if (eptr
[0] != '\0' || 
10425                 ((j 
& 1) == 0 && val 
< 1) || 
10426                 ((j 
& 1) == 1 && val 
< 0)) { 
10427                 sdsfreesplitres(v
,vlen
); 
10431         /* Finally set the new config */ 
10432         resetServerSaveParams(); 
10433         for (j 
= 0; j 
< vlen
; j 
+= 2) { 
10437             seconds 
= strtoll(v
[j
],NULL
,10); 
10438             changes 
= strtoll(v
[j
+1],NULL
,10); 
10439             appendServerSaveParams(seconds
, changes
); 
10441         sdsfreesplitres(v
,vlen
); 
10443         addReplySds(c
,sdscatprintf(sdsempty(), 
10444             "-ERR not supported CONFIG parameter %s\r\n", 
10445             (char*)c
->argv
[2]->ptr
)); 
10450     addReply(c
,shared
.ok
); 
10453 badfmt
: /* Bad format errors */ 
10454     addReplySds(c
,sdscatprintf(sdsempty(), 
10455         "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n", 
10457             (char*)c
->argv
[2]->ptr
)); 
10461 static void configGetCommand(redisClient 
*c
) { 
10462     robj 
*o 
= getDecodedObject(c
->argv
[2]); 
10463     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
10464     char *pattern 
= o
->ptr
; 
10467     addReply(c
,lenobj
); 
10468     decrRefCount(lenobj
); 
10470     if (stringmatch(pattern
,"dbfilename",0)) { 
10471         addReplyBulkCString(c
,"dbfilename"); 
10472         addReplyBulkCString(c
,server
.dbfilename
); 
10475     if (stringmatch(pattern
,"requirepass",0)) { 
10476         addReplyBulkCString(c
,"requirepass"); 
10477         addReplyBulkCString(c
,server
.requirepass
); 
10480     if (stringmatch(pattern
,"masterauth",0)) { 
10481         addReplyBulkCString(c
,"masterauth"); 
10482         addReplyBulkCString(c
,server
.masterauth
); 
10485     if (stringmatch(pattern
,"maxmemory",0)) { 
10488         ll2string(buf
,128,server
.maxmemory
); 
10489         addReplyBulkCString(c
,"maxmemory"); 
10490         addReplyBulkCString(c
,buf
); 
10493     if (stringmatch(pattern
,"timeout",0)) { 
10496         ll2string(buf
,128,server
.maxidletime
); 
10497         addReplyBulkCString(c
,"timeout"); 
10498         addReplyBulkCString(c
,buf
); 
10501     if (stringmatch(pattern
,"appendonly",0)) { 
10502         addReplyBulkCString(c
,"appendonly"); 
10503         addReplyBulkCString(c
,server
.appendonly 
? "yes" : "no"); 
10506     if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) { 
10507         addReplyBulkCString(c
,"no-appendfsync-on-rewrite"); 
10508         addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite 
? "yes" : "no"); 
10511     if (stringmatch(pattern
,"appendfsync",0)) { 
10514         switch(server
.appendfsync
) { 
10515         case APPENDFSYNC_NO
: policy 
= "no"; break; 
10516         case APPENDFSYNC_EVERYSEC
: policy 
= "everysec"; break; 
10517         case APPENDFSYNC_ALWAYS
: policy 
= "always"; break; 
10518         default: policy 
= "unknown"; break; /* too harmless to panic */ 
10520         addReplyBulkCString(c
,"appendfsync"); 
10521         addReplyBulkCString(c
,policy
); 
10524     if (stringmatch(pattern
,"save",0)) { 
10525         sds buf 
= sdsempty(); 
10528         for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
10529             buf 
= sdscatprintf(buf
,"%ld %d", 
10530                     server
.saveparams
[j
].seconds
, 
10531                     server
.saveparams
[j
].changes
); 
10532             if (j 
!= server
.saveparamslen
-1) 
10533                 buf 
= sdscatlen(buf
," ",1); 
10535         addReplyBulkCString(c
,"save"); 
10536         addReplyBulkCString(c
,buf
); 
10541     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2); 
10544 static void configCommand(redisClient 
*c
) { 
10545     if (!strcasecmp(c
->argv
[1]->ptr
,"set")) { 
10546         if (c
->argc 
!= 4) goto badarity
; 
10547         configSetCommand(c
); 
10548     } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) { 
10549         if (c
->argc 
!= 3) goto badarity
; 
10550         configGetCommand(c
); 
10551     } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) { 
10552         if (c
->argc 
!= 2) goto badarity
; 
10553         server
.stat_numcommands 
= 0; 
10554         server
.stat_numconnections 
= 0; 
10555         server
.stat_expiredkeys 
= 0; 
10556         server
.stat_starttime 
= time(NULL
); 
10557         addReply(c
,shared
.ok
); 
10559         addReplySds(c
,sdscatprintf(sdsempty(), 
10560             "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n")); 
10565     addReplySds(c
,sdscatprintf(sdsempty(), 
10566         "-ERR Wrong number of arguments for CONFIG %s\r\n", 
10567         (char*) c
->argv
[1]->ptr
)); 
10570 /* =========================== Pubsub implementation ======================== */ 
10572 static void freePubsubPattern(void *p
) { 
10573     pubsubPattern 
*pat 
= p
; 
10575     decrRefCount(pat
->pattern
); 
10579 static int listMatchPubsubPattern(void *a
, void *b
) { 
10580     pubsubPattern 
*pa 
= a
, *pb 
= b
; 
10582     return (pa
->client 
== pb
->client
) && 
10583            (equalStringObjects(pa
->pattern
,pb
->pattern
)); 
10586 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or 
10587  * 0 if the client was already subscribed to that channel. */ 
10588 static int pubsubSubscribeChannel(redisClient 
*c
, robj 
*channel
) { 
10589     struct dictEntry 
*de
; 
10590     list 
*clients 
= NULL
; 
10593     /* Add the channel to the client -> channels hash table */ 
10594     if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) { 
10596         incrRefCount(channel
); 
10597         /* Add the client to the channel -> list of clients hash table */ 
10598         de 
= dictFind(server
.pubsub_channels
,channel
); 
10600             clients 
= listCreate(); 
10601             dictAdd(server
.pubsub_channels
,channel
,clients
); 
10602             incrRefCount(channel
); 
10604             clients 
= dictGetEntryVal(de
); 
10606         listAddNodeTail(clients
,c
); 
10608     /* Notify the client */ 
10609     addReply(c
,shared
.mbulk3
); 
10610     addReply(c
,shared
.subscribebulk
); 
10611     addReplyBulk(c
,channel
); 
10612     addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
10616 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
10617  * 0 if the client was not subscribed to the specified channel. */ 
10618 static int pubsubUnsubscribeChannel(redisClient 
*c
, robj 
*channel
, int notify
) { 
10619     struct dictEntry 
*de
; 
10624     /* Remove the channel from the client -> channels hash table */ 
10625     incrRefCount(channel
); /* channel may be just a pointer to the same object 
10626                             we have in the hash tables. Protect it... */ 
10627     if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) { 
10629         /* Remove the client from the channel -> clients list hash table */ 
10630         de 
= dictFind(server
.pubsub_channels
,channel
); 
10631         assert(de 
!= NULL
); 
10632         clients 
= dictGetEntryVal(de
); 
10633         ln 
= listSearchKey(clients
,c
); 
10634         assert(ln 
!= NULL
); 
10635         listDelNode(clients
,ln
); 
10636         if (listLength(clients
) == 0) { 
10637             /* Free the list and associated hash entry at all if this was 
10638              * the latest client, so that it will be possible to abuse 
10639              * Redis PUBSUB creating millions of channels. */ 
10640             dictDelete(server
.pubsub_channels
,channel
); 
10643     /* Notify the client */ 
10645         addReply(c
,shared
.mbulk3
); 
10646         addReply(c
,shared
.unsubscribebulk
); 
10647         addReplyBulk(c
,channel
); 
10648         addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+ 
10649                        listLength(c
->pubsub_patterns
)); 
10652     decrRefCount(channel
); /* it is finally safe to release it */ 
10656 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */ 
10657 static int pubsubSubscribePattern(redisClient 
*c
, robj 
*pattern
) { 
10660     if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) { 
10662         pubsubPattern 
*pat
; 
10663         listAddNodeTail(c
->pubsub_patterns
,pattern
); 
10664         incrRefCount(pattern
); 
10665         pat 
= zmalloc(sizeof(*pat
)); 
10666         pat
->pattern 
= getDecodedObject(pattern
); 
10668         listAddNodeTail(server
.pubsub_patterns
,pat
); 
10670     /* Notify the client */ 
10671     addReply(c
,shared
.mbulk3
); 
10672     addReply(c
,shared
.psubscribebulk
); 
10673     addReplyBulk(c
,pattern
); 
10674     addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
10678 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
10679  * 0 if the client was not subscribed to the specified channel. */ 
10680 static int pubsubUnsubscribePattern(redisClient 
*c
, robj 
*pattern
, int notify
) { 
10685     incrRefCount(pattern
); /* Protect the object. May be the same we remove */ 
10686     if ((ln 
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) { 
10688         listDelNode(c
->pubsub_patterns
,ln
); 
10690         pat
.pattern 
= pattern
; 
10691         ln 
= listSearchKey(server
.pubsub_patterns
,&pat
); 
10692         listDelNode(server
.pubsub_patterns
,ln
); 
10694     /* Notify the client */ 
10696         addReply(c
,shared
.mbulk3
); 
10697         addReply(c
,shared
.punsubscribebulk
); 
10698         addReplyBulk(c
,pattern
); 
10699         addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+ 
10700                        listLength(c
->pubsub_patterns
)); 
10702     decrRefCount(pattern
); 
10706 /* Unsubscribe from all the channels. Return the number of channels the 
10707  * client was subscribed from. */ 
10708 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
) { 
10709     dictIterator 
*di 
= dictGetIterator(c
->pubsub_channels
); 
10713     while((de 
= dictNext(di
)) != NULL
) { 
10714         robj 
*channel 
= dictGetEntryKey(de
); 
10716         count 
+= pubsubUnsubscribeChannel(c
,channel
,notify
); 
10718     dictReleaseIterator(di
); 
10722 /* Unsubscribe from all the patterns. Return the number of patterns the 
10723  * client was subscribed from. */ 
10724 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
) { 
10729     listRewind(c
->pubsub_patterns
,&li
); 
10730     while ((ln 
= listNext(&li
)) != NULL
) { 
10731         robj 
*pattern 
= ln
->value
; 
10733         count 
+= pubsubUnsubscribePattern(c
,pattern
,notify
); 
10738 /* Publish a message */ 
10739 static int pubsubPublishMessage(robj 
*channel
, robj 
*message
) { 
10741     struct dictEntry 
*de
; 
10745     /* Send to clients listening for that channel */ 
10746     de 
= dictFind(server
.pubsub_channels
,channel
); 
10748         list 
*list 
= dictGetEntryVal(de
); 
10752         listRewind(list
,&li
); 
10753         while ((ln 
= listNext(&li
)) != NULL
) { 
10754             redisClient 
*c 
= ln
->value
; 
10756             addReply(c
,shared
.mbulk3
); 
10757             addReply(c
,shared
.messagebulk
); 
10758             addReplyBulk(c
,channel
); 
10759             addReplyBulk(c
,message
); 
10763     /* Send to clients listening to matching channels */ 
10764     if (listLength(server
.pubsub_patterns
)) { 
10765         listRewind(server
.pubsub_patterns
,&li
); 
10766         channel 
= getDecodedObject(channel
); 
10767         while ((ln 
= listNext(&li
)) != NULL
) { 
10768             pubsubPattern 
*pat 
= ln
->value
; 
10770             if (stringmatchlen((char*)pat
->pattern
->ptr
, 
10771                                 sdslen(pat
->pattern
->ptr
), 
10772                                 (char*)channel
->ptr
, 
10773                                 sdslen(channel
->ptr
),0)) { 
10774                 addReply(pat
->client
,shared
.mbulk4
); 
10775                 addReply(pat
->client
,shared
.pmessagebulk
); 
10776                 addReplyBulk(pat
->client
,pat
->pattern
); 
10777                 addReplyBulk(pat
->client
,channel
); 
10778                 addReplyBulk(pat
->client
,message
); 
10782         decrRefCount(channel
); 
10787 static void subscribeCommand(redisClient 
*c
) { 
10790     for (j 
= 1; j 
< c
->argc
; j
++) 
10791         pubsubSubscribeChannel(c
,c
->argv
[j
]); 
10794 static void unsubscribeCommand(redisClient 
*c
) { 
10795     if (c
->argc 
== 1) { 
10796         pubsubUnsubscribeAllChannels(c
,1); 
10801         for (j 
= 1; j 
< c
->argc
; j
++) 
10802             pubsubUnsubscribeChannel(c
,c
->argv
[j
],1); 
10806 static void psubscribeCommand(redisClient 
*c
) { 
10809     for (j 
= 1; j 
< c
->argc
; j
++) 
10810         pubsubSubscribePattern(c
,c
->argv
[j
]); 
10813 static void punsubscribeCommand(redisClient 
*c
) { 
10814     if (c
->argc 
== 1) { 
10815         pubsubUnsubscribeAllPatterns(c
,1); 
10820         for (j 
= 1; j 
< c
->argc
; j
++) 
10821             pubsubUnsubscribePattern(c
,c
->argv
[j
],1); 
10825 static void publishCommand(redisClient 
*c
) { 
10826     int receivers 
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]); 
10827     addReplyLongLong(c
,receivers
); 
10830 /* ===================== WATCH (CAS alike for MULTI/EXEC) =================== 
10832  * The implementation uses a per-DB hash table mapping keys to list of clients 
10833  * WATCHing those keys, so that given a key that is going to be modified 
10834  * we can mark all the associated clients as dirty. 
10836  * Also every client contains a list of WATCHed keys so that's possible to 
10837  * un-watch such keys when the client is freed or when UNWATCH is called. */ 
10839 /* In the client->watched_keys list we need to use watchedKey structures 
10840  * as in order to identify a key in Redis we need both the key name and the 
10842 typedef struct watchedKey 
{ 
10847 /* Watch for the specified key */ 
10848 static void watchForKey(redisClient 
*c
, robj 
*key
) { 
10849     list 
*clients 
= NULL
; 
10854     /* Check if we are already watching for this key */ 
10855     listRewind(c
->watched_keys
,&li
); 
10856     while((ln 
= listNext(&li
))) { 
10857         wk 
= listNodeValue(ln
); 
10858         if (wk
->db 
== c
->db 
&& equalStringObjects(key
,wk
->key
)) 
10859             return; /* Key already watched */ 
10861     /* This key is not already watched in this DB. Let's add it */ 
10862     clients 
= dictFetchValue(c
->db
->watched_keys
,key
); 
10864         clients 
= listCreate(); 
10865         dictAdd(c
->db
->watched_keys
,key
,clients
); 
10868     listAddNodeTail(clients
,c
); 
10869     /* Add the new key to the lits of keys watched by this client */ 
10870     wk 
= zmalloc(sizeof(*wk
)); 
10874     listAddNodeTail(c
->watched_keys
,wk
); 
10877 /* Unwatch all the keys watched by this client. To clean the EXEC dirty 
10878  * flag is up to the caller. */ 
10879 static void unwatchAllKeys(redisClient 
*c
) { 
10883     if (listLength(c
->watched_keys
) == 0) return; 
10884     listRewind(c
->watched_keys
,&li
); 
10885     while((ln 
= listNext(&li
))) { 
10889         /* Lookup the watched key -> clients list and remove the client 
10891         wk 
= listNodeValue(ln
); 
10892         clients 
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
); 
10893         assert(clients 
!= NULL
); 
10894         listDelNode(clients
,listSearchKey(clients
,c
)); 
10895         /* Kill the entry at all if this was the only client */ 
10896         if (listLength(clients
) == 0) 
10897             dictDelete(wk
->db
->watched_keys
, wk
->key
); 
10898         /* Remove this watched key from the client->watched list */ 
10899         listDelNode(c
->watched_keys
,ln
); 
10900         decrRefCount(wk
->key
); 
10905 /* "Touch" a key, so that if this key is being WATCHed by some client the 
10906  * next EXEC will fail. */ 
10907 static void touchWatchedKey(redisDb 
*db
, robj 
*key
) { 
10912     if (dictSize(db
->watched_keys
) == 0) return; 
10913     clients 
= dictFetchValue(db
->watched_keys
, key
); 
10914     if (!clients
) return; 
10916     /* Mark all the clients watching this key as REDIS_DIRTY_CAS */ 
10917     /* Check if we are already watching for this key */ 
10918     listRewind(clients
,&li
); 
10919     while((ln 
= listNext(&li
))) { 
10920         redisClient 
*c 
= listNodeValue(ln
); 
10922         c
->flags 
|= REDIS_DIRTY_CAS
; 
10926 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the 
10927  * flush but will be deleted as effect of the flushing operation should 
10928  * be touched. "dbid" is the DB that's getting the flush. -1 if it is 
10929  * a FLUSHALL operation (all the DBs flushed). */ 
10930 static void touchWatchedKeysOnFlush(int dbid
) { 
10934     /* For every client, check all the waited keys */ 
10935     listRewind(server
.clients
,&li1
); 
10936     while((ln 
= listNext(&li1
))) { 
10937         redisClient 
*c 
= listNodeValue(ln
); 
10938         listRewind(c
->watched_keys
,&li2
); 
10939         while((ln 
= listNext(&li2
))) { 
10940             watchedKey 
*wk 
= listNodeValue(ln
); 
10942             /* For every watched key matching the specified DB, if the 
10943              * key exists, mark the client as dirty, as the key will be 
10945             if (dbid 
== -1 || wk
->db
->id 
== dbid
) { 
10946                 if (dictFind(wk
->db
->dict
, wk
->key
->ptr
) != NULL
) 
10947                     c
->flags 
|= REDIS_DIRTY_CAS
; 
10953 static void watchCommand(redisClient 
*c
) { 
10956     if (c
->flags 
& REDIS_MULTI
) { 
10957         addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n")); 
10960     for (j 
= 1; j 
< c
->argc
; j
++) 
10961         watchForKey(c
,c
->argv
[j
]); 
10962     addReply(c
,shared
.ok
); 
10965 static void unwatchCommand(redisClient 
*c
) { 
10967     c
->flags 
&= (~REDIS_DIRTY_CAS
); 
10968     addReply(c
,shared
.ok
); 
10971 /* ================================= Debugging ============================== */ 
10973 /* Compute the sha1 of string at 's' with 'len' bytes long. 
10974  * The SHA1 is then xored againt the string pointed by digest. 
10975  * Since xor is commutative, this operation is used in order to 
10976  * "add" digests relative to unordered elements. 
10978  * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */ 
10979 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) { 
10981     unsigned char hash
[20], *s 
= ptr
; 
10985     SHA1Update(&ctx
,s
,len
); 
10986     SHA1Final(hash
,&ctx
); 
10988     for (j 
= 0; j 
< 20; j
++) 
10989         digest
[j
] ^= hash
[j
]; 
10992 static void xorObjectDigest(unsigned char *digest
, robj 
*o
) { 
10993     o 
= getDecodedObject(o
); 
10994     xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
)); 
10998 /* This function instead of just computing the SHA1 and xoring it 
10999  * against diget, also perform the digest of "digest" itself and 
11000  * replace the old value with the new one. 
11002  * So the final digest will be: 
11004  * digest = SHA1(digest xor SHA1(data)) 
11006  * This function is used every time we want to preserve the order so 
11007  * that digest(a,b,c,d) will be different than digest(b,c,d,a) 
11009  * Also note that mixdigest("foo") followed by mixdigest("bar") 
11010  * will lead to a different digest compared to "fo", "obar". 
11012 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) { 
11016     xorDigest(digest
,s
,len
); 
11018     SHA1Update(&ctx
,digest
,20); 
11019     SHA1Final(digest
,&ctx
); 
11022 static void mixObjectDigest(unsigned char *digest
, robj 
*o
) { 
11023     o 
= getDecodedObject(o
); 
11024     mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
)); 
11028 /* Compute the dataset digest. Since keys, sets elements, hashes elements 
11029  * are not ordered, we use a trick: every aggregate digest is the xor 
11030  * of the digests of their elements. This way the order will not change 
11031  * the result. For list instead we use a feedback entering the output digest 
11032  * as input in order to ensure that a different ordered list will result in 
11033  * a different digest. */ 
11034 static void computeDatasetDigest(unsigned char *final
) { 
11035     unsigned char digest
[20]; 
11037     dictIterator 
*di 
= NULL
; 
11042     memset(final
,0,20); /* Start with a clean result */ 
11044     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
11045         redisDb 
*db 
= server
.db
+j
; 
11047         if (dictSize(db
->dict
) == 0) continue; 
11048         di 
= dictGetIterator(db
->dict
); 
11050         /* hash the DB id, so the same dataset moved in a different 
11051          * DB will lead to a different digest */ 
11053         mixDigest(final
,&aux
,sizeof(aux
)); 
11055         /* Iterate this DB writing every entry */ 
11056         while((de 
= dictNext(di
)) != NULL
) { 
11061             memset(digest
,0,20); /* This key-val digest */ 
11062             key 
= dictGetEntryKey(de
); 
11063             keyobj 
= createStringObject(key
,sdslen(key
)); 
11065             mixDigest(digest
,key
,sdslen(key
)); 
11067             /* Make sure the key is loaded if VM is active */ 
11068             o 
= lookupKeyRead(db
,keyobj
); 
11070             aux 
= htonl(o
->type
); 
11071             mixDigest(digest
,&aux
,sizeof(aux
)); 
11072             expiretime 
= getExpire(db
,keyobj
); 
11074             /* Save the key and associated value */ 
11075             if (o
->type 
== REDIS_STRING
) { 
11076                 mixObjectDigest(digest
,o
); 
11077             } else if (o
->type 
== REDIS_LIST
) { 
11078                 listTypeIterator 
*li 
= listTypeInitIterator(o
,0,REDIS_TAIL
); 
11079                 listTypeEntry entry
; 
11080                 while(listTypeNext(li
,&entry
)) { 
11081                     robj 
*eleobj 
= listTypeGet(&entry
); 
11082                     mixObjectDigest(digest
,eleobj
); 
11083                     decrRefCount(eleobj
); 
11085                 listTypeReleaseIterator(li
); 
11086             } else if (o
->type 
== REDIS_SET
) { 
11087                 dict 
*set 
= o
->ptr
; 
11088                 dictIterator 
*di 
= dictGetIterator(set
); 
11091                 while((de 
= dictNext(di
)) != NULL
) { 
11092                     robj 
*eleobj 
= dictGetEntryKey(de
); 
11094                     xorObjectDigest(digest
,eleobj
); 
11096                 dictReleaseIterator(di
); 
11097             } else if (o
->type 
== REDIS_ZSET
) { 
11099                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
11102                 while((de 
= dictNext(di
)) != NULL
) { 
11103                     robj 
*eleobj 
= dictGetEntryKey(de
); 
11104                     double *score 
= dictGetEntryVal(de
); 
11105                     unsigned char eledigest
[20]; 
11107                     snprintf(buf
,sizeof(buf
),"%.17g",*score
); 
11108                     memset(eledigest
,0,20); 
11109                     mixObjectDigest(eledigest
,eleobj
); 
11110                     mixDigest(eledigest
,buf
,strlen(buf
)); 
11111                     xorDigest(digest
,eledigest
,20); 
11113                 dictReleaseIterator(di
); 
11114             } else if (o
->type 
== REDIS_HASH
) { 
11115                 hashTypeIterator 
*hi
; 
11118                 hi 
= hashTypeInitIterator(o
); 
11119                 while (hashTypeNext(hi
) != REDIS_ERR
) { 
11120                     unsigned char eledigest
[20]; 
11122                     memset(eledigest
,0,20); 
11123                     obj 
= hashTypeCurrent(hi
,REDIS_HASH_KEY
); 
11124                     mixObjectDigest(eledigest
,obj
); 
11126                     obj 
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
); 
11127                     mixObjectDigest(eledigest
,obj
); 
11129                     xorDigest(digest
,eledigest
,20); 
11131                 hashTypeReleaseIterator(hi
); 
11133                 redisPanic("Unknown object type"); 
11135             /* If the key has an expire, add it to the mix */ 
11136             if (expiretime 
!= -1) xorDigest(digest
,"!!expire!!",10); 
11137             /* We can finally xor the key-val digest to the final digest */ 
11138             xorDigest(final
,digest
,20); 
11139             decrRefCount(keyobj
); 
11141         dictReleaseIterator(di
); 
11145 static void debugCommand(redisClient 
*c
) { 
11146     if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) { 
11147         *((char*)-1) = 'x'; 
11148     } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) { 
11149         if (rdbSave(server
.dbfilename
) != REDIS_OK
) { 
11150             addReply(c
,shared
.err
); 
11154         if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
11155             addReply(c
,shared
.err
); 
11158         redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD"); 
11159         addReply(c
,shared
.ok
); 
11160     } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) { 
11162         if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) { 
11163             addReply(c
,shared
.err
); 
11166         redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF"); 
11167         addReply(c
,shared
.ok
); 
11168     } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc 
== 3) { 
11169         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
); 
11173             addReply(c
,shared
.nokeyerr
); 
11176         val 
= dictGetEntryVal(de
); 
11177         if (!server
.vm_enabled 
|| (val
->storage 
== REDIS_VM_MEMORY 
|| 
11178                                    val
->storage 
== REDIS_VM_SWAPPING
)) { 
11182             if (val
->encoding 
< (sizeof(strencoding
)/sizeof(char*))) { 
11183                 strenc 
= strencoding
[val
->encoding
]; 
11185                 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
); 
11188             addReplySds(c
,sdscatprintf(sdsempty(), 
11189                 "+Value at:%p refcount:%d " 
11190                 "encoding:%s serializedlength:%lld\r\n", 
11191                 (void*)val
, val
->refcount
, 
11192                 strenc
, (long long) rdbSavedObjectLen(val
,NULL
))); 
11194             vmpointer 
*vp 
= (vmpointer
*) val
; 
11195             addReplySds(c
,sdscatprintf(sdsempty(), 
11196                 "+Value swapped at: page %llu " 
11197                 "using %llu pages\r\n", 
11198                 (unsigned long long) vp
->page
, 
11199                 (unsigned long long) vp
->usedpages
)); 
11201     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc 
== 3) { 
11202         lookupKeyRead(c
->db
,c
->argv
[2]); 
11203         addReply(c
,shared
.ok
); 
11204     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc 
== 3) { 
11205         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
); 
11209         if (!server
.vm_enabled
) { 
11210             addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n")); 
11214             addReply(c
,shared
.nokeyerr
); 
11217         val 
= dictGetEntryVal(de
); 
11219         if (val
->storage 
!= REDIS_VM_MEMORY
) { 
11220             addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n")); 
11221         } else if (val
->refcount 
!= 1) { 
11222             addReplySds(c
,sdsnew("-ERR Object is shared\r\n")); 
11223         } else if ((vp 
= vmSwapObjectBlocking(val
)) != NULL
) { 
11224             dictGetEntryVal(de
) = vp
; 
11225             addReply(c
,shared
.ok
); 
11227             addReply(c
,shared
.err
); 
11229     } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc 
== 3) { 
11234         if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
) 
11236         for (j 
= 0; j 
< keys
; j
++) { 
11237             snprintf(buf
,sizeof(buf
),"key:%lu",j
); 
11238             key 
= createStringObject(buf
,strlen(buf
)); 
11239             if (lookupKeyRead(c
->db
,key
) != NULL
) { 
11243             snprintf(buf
,sizeof(buf
),"value:%lu",j
); 
11244             val 
= createStringObject(buf
,strlen(buf
)); 
11245             dbAdd(c
->db
,key
,val
); 
11248         addReply(c
,shared
.ok
); 
11249     } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc 
== 2) { 
11250         unsigned char digest
[20]; 
11251         sds d 
= sdsnew("+"); 
11254         computeDatasetDigest(digest
); 
11255         for (j 
= 0; j 
< 20; j
++) 
11256             d 
= sdscatprintf(d
, "%02x",digest
[j
]); 
11258         d 
= sdscatlen(d
,"\r\n",2); 
11261         addReplySds(c
,sdsnew( 
11262             "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n")); 
11266 static void _redisAssert(char *estr
, char *file
, int line
) { 
11267     redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ==="); 
11268     redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
); 
11269 #ifdef HAVE_BACKTRACE 
11270     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
11271     *((char*)-1) = 'x'; 
11275 static void _redisPanic(char *msg
, char *file
, int line
) { 
11276     redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue"); 
11277     redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
); 
11278 #ifdef HAVE_BACKTRACE 
11279     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
11280     *((char*)-1) = 'x'; 
11284 /* =================================== Main! ================================ */ 
11287 int linuxOvercommitMemoryValue(void) { 
11288     FILE *fp 
= fopen("/proc/sys/vm/overcommit_memory","r"); 
11291     if (!fp
) return -1; 
11292     if (fgets(buf
,64,fp
) == NULL
) { 
11301 void linuxOvercommitMemoryWarning(void) { 
11302     if (linuxOvercommitMemoryValue() == 0) { 
11303         redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); 
11306 #endif /* __linux__ */ 
11308 static void daemonize(void) { 
11312     if (fork() != 0) exit(0); /* parent exits */ 
11313     setsid(); /* create a new session */ 
11315     /* Every output goes to /dev/null. If Redis is daemonized but 
11316      * the 'logfile' is set to 'stdout' in the configuration file 
11317      * it will not log at all. */ 
11318     if ((fd 
= open("/dev/null", O_RDWR
, 0)) != -1) { 
11319         dup2(fd
, STDIN_FILENO
); 
11320         dup2(fd
, STDOUT_FILENO
); 
11321         dup2(fd
, STDERR_FILENO
); 
11322         if (fd 
> STDERR_FILENO
) close(fd
); 
11324     /* Try to write the pid file */ 
11325     fp 
= fopen(server
.pidfile
,"w"); 
11327         fprintf(fp
,"%d\n",getpid()); 
11332 static void version() { 
11333     printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
, 
11334         REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0); 
11338 static void usage() { 
11339     fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n"); 
11340     fprintf(stderr
,"       ./redis-server - (read config from stdin)\n"); 
11344 int main(int argc
, char **argv
) { 
11347     initServerConfig(); 
11348     sortCommandTable(); 
11350         if (strcmp(argv
[1], "-v") == 0 || 
11351             strcmp(argv
[1], "--version") == 0) version(); 
11352         if (strcmp(argv
[1], "--help") == 0) usage(); 
11353         resetServerSaveParams(); 
11354         loadServerConfig(argv
[1]); 
11355     } else if ((argc 
> 2)) { 
11358         redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); 
11360     if (server
.daemonize
) daemonize(); 
11362     redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
); 
11364     linuxOvercommitMemoryWarning(); 
11366     start 
= time(NULL
); 
11367     if (server
.appendonly
) { 
11368         if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
) 
11369             redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
); 
11371         if (rdbLoad(server
.dbfilename
) == REDIS_OK
) 
11372             redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
); 
11374     redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
); 
11375     aeSetBeforeSleepProc(server
.el
,beforeSleep
); 
11377     aeDeleteEventLoop(server
.el
); 
11381 /* ============================= Backtrace support ========================= */ 
11383 #ifdef HAVE_BACKTRACE 
11384 static char *findFuncName(void *pointer
, unsigned long *offset
); 
11386 static void *getMcontextEip(ucontext_t 
*uc
) { 
11387 #if defined(__FreeBSD__) 
11388     return (void*) uc
->uc_mcontext
.mc_eip
; 
11389 #elif defined(__dietlibc__) 
11390     return (void*) uc
->uc_mcontext
.eip
; 
11391 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) 
11393     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
11395     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
11397 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6) 
11398   #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__) 
11399     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
11401     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
11403 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__) 
11404     return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */ 
11405 #elif defined(__ia64__) /* Linux IA64 */ 
11406     return (void*) uc
->uc_mcontext
.sc_ip
; 
11412 static void segvHandler(int sig
, siginfo_t 
*info
, void *secret
) { 
11414     char **messages 
= NULL
; 
11415     int i
, trace_size 
= 0; 
11416     unsigned long offset
=0; 
11417     ucontext_t 
*uc 
= (ucontext_t
*) secret
; 
11419     REDIS_NOTUSED(info
); 
11421     redisLog(REDIS_WARNING
, 
11422         "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
); 
11423     infostring 
= genRedisInfoString(); 
11424     redisLog(REDIS_WARNING
, "%s",infostring
); 
11425     /* It's not safe to sdsfree() the returned string under memory 
11426      * corruption conditions. Let it leak as we are going to abort */ 
11428     trace_size 
= backtrace(trace
, 100); 
11429     /* overwrite sigaction with caller's address */ 
11430     if (getMcontextEip(uc
) != NULL
) { 
11431         trace
[1] = getMcontextEip(uc
); 
11433     messages 
= backtrace_symbols(trace
, trace_size
); 
11435     for (i
=1; i
<trace_size
; ++i
) { 
11436         char *fn 
= findFuncName(trace
[i
], &offset
), *p
; 
11438         p 
= strchr(messages
[i
],'+'); 
11439         if (!fn 
|| (p 
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) { 
11440             redisLog(REDIS_WARNING
,"%s", messages
[i
]); 
11442             redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
); 
11445     /* free(messages); Don't call free() with possibly corrupted memory. */ 
11449 static void sigtermHandler(int sig
) { 
11450     REDIS_NOTUSED(sig
); 
11452     redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down..."); 
11453     server
.shutdown_asap 
= 1; 
11456 static void setupSigSegvAction(void) { 
11457     struct sigaction act
; 
11459     sigemptyset (&act
.sa_mask
); 
11460     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction 
11461      * is used. Otherwise, sa_handler is used */ 
11462     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND 
| SA_SIGINFO
; 
11463     act
.sa_sigaction 
= segvHandler
; 
11464     sigaction (SIGSEGV
, &act
, NULL
); 
11465     sigaction (SIGBUS
, &act
, NULL
); 
11466     sigaction (SIGFPE
, &act
, NULL
); 
11467     sigaction (SIGILL
, &act
, NULL
); 
11468     sigaction (SIGBUS
, &act
, NULL
); 
11470     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND
; 
11471     act
.sa_handler 
= sigtermHandler
; 
11472     sigaction (SIGTERM
, &act
, NULL
); 
11476 #include "staticsymbols.h" 
11477 /* This function try to convert a pointer into a function name. It's used in 
11478  * oreder to provide a backtrace under segmentation fault that's able to 
11479  * display functions declared as static (otherwise the backtrace is useless). */ 
11480 static char *findFuncName(void *pointer
, unsigned long *offset
){ 
11482     unsigned long off
, minoff 
= 0; 
11484     /* Try to match against the Symbol with the smallest offset */ 
11485     for (i
=0; symsTable
[i
].pointer
; i
++) { 
11486         unsigned long lp 
= (unsigned long) pointer
; 
11488         if (lp 
!= (unsigned long)-1 && lp 
>= symsTable
[i
].pointer
) { 
11489             off
=lp
-symsTable
[i
].pointer
; 
11490             if (ret 
< 0 || off 
< minoff
) { 
11496     if (ret 
== -1) return NULL
; 
11498     return symsTable
[ret
].name
; 
11500 #else /* HAVE_BACKTRACE */ 
11501 static void setupSigSegvAction(void) { 
11503 #endif /* HAVE_BACKTRACE */