2  * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 
   5  * Redistribution and use in source and binary forms, with or without 
   6  * modification, are permitted provided that the following conditions are met: 
   8  *   * Redistributions of source code must retain the above copyright notice, 
   9  *     this list of conditions and the following disclaimer. 
  10  *   * Redistributions in binary form must reproduce the above copyright 
  11  *     notice, this list of conditions and the following disclaimer in the 
  12  *     documentation and/or other materials provided with the distribution. 
  13  *   * Neither the name of Redis nor the names of its contributors may be used 
  14  *     to endorse or promote products derived from this software without 
  15  *     specific prior written permission. 
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  20  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  27  * POSSIBILITY OF SUCH DAMAGE. 
  30 #define REDIS_VERSION "2.1.1" 
  45 #endif /* HAVE_BACKTRACE */ 
  53 #include <arpa/inet.h> 
  57 #include <sys/resource.h> 
  65 #include "solarisfixes.h" 
  69 #include "ae.h"     /* Event driven programming library */ 
  70 #include "sds.h"    /* Dynamic safe strings */ 
  71 #include "anet.h"   /* Networking the easy way */ 
  72 #include "dict.h"   /* Hash tables */ 
  73 #include "adlist.h" /* Linked lists */ 
  74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */ 
  75 #include "lzf.h"    /* LZF compression library */ 
  76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */ 
  77 #include "zipmap.h" /* Compact dictionary-alike data structure */ 
  78 #include "ziplist.h" /* Compact list data structure */ 
  79 #include "sha1.h"   /* SHA1 is used for DEBUG DIGEST */ 
  80 #include "release.h" /* Release and/or git repository information */ 
  86 /* Static server configuration */ 
  87 #define REDIS_SERVERPORT        6379    /* TCP port */ 
  88 #define REDIS_MAXIDLETIME       (60*5)  /* default client timeout */ 
  89 #define REDIS_IOBUF_LEN         1024 
  90 #define REDIS_LOADBUF_LEN       1024 
  91 #define REDIS_STATIC_ARGS       8 
  92 #define REDIS_DEFAULT_DBNUM     16 
  93 #define REDIS_CONFIGLINE_MAX    1024 
  94 #define REDIS_OBJFREELIST_MAX   1000000 /* Max number of objects to cache */ 
  95 #define REDIS_MAX_SYNC_TIME     60      /* Slave can't take more to sync */ 
  96 #define REDIS_EXPIRELOOKUPS_PER_CRON    10 /* lookup 10 expires per loop */ 
  97 #define REDIS_MAX_WRITE_PER_EVENT (1024*64) 
  98 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */ 
 100 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */ 
 101 #define REDIS_WRITEV_THRESHOLD      3 
 102 /* Max number of iovecs used for each writev call */ 
 103 #define REDIS_WRITEV_IOVEC_COUNT    256 
 105 /* Hash table parameters */ 
 106 #define REDIS_HT_MINFILL        10      /* Minimal hash table fill 10% */ 
 109 #define REDIS_CMD_BULK          1       /* Bulk write command */ 
 110 #define REDIS_CMD_INLINE        2       /* Inline command */ 
 111 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with 
 112    this flags will return an error when the 'maxmemory' option is set in the 
 113    config file and the server is using more than maxmemory bytes of memory. 
 114    In short this commands are denied on low memory conditions. */ 
 115 #define REDIS_CMD_DENYOOM       4 
 116 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */ 
 119 #define REDIS_STRING 0 
 124 #define REDIS_VMPOINTER 8 
 126 /* Objects encoding. Some kind of objects like Strings and Hashes can be 
 127  * internally represented in multiple ways. The 'encoding' field of the object 
 128  * is set to one of this fields for this object. */ 
 129 #define REDIS_ENCODING_RAW 0     /* Raw representation */ 
 130 #define REDIS_ENCODING_INT 1     /* Encoded as integer */ 
 131 #define REDIS_ENCODING_HT 2      /* Encoded as hash table */ 
 132 #define REDIS_ENCODING_ZIPMAP 3  /* Encoded as zipmap */ 
 133 #define REDIS_ENCODING_LIST 4    /* Encoded as zipmap */ 
 134 #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */ 
 136 static char* strencoding
[] = { 
 137     "raw", "int", "hashtable", "zipmap", "list", "ziplist" 
 140 /* Object types only used for dumping to disk */ 
 141 #define REDIS_EXPIRETIME 253 
 142 #define REDIS_SELECTDB 254 
 143 #define REDIS_EOF 255 
 145 /* Defines related to the dump file format. To store 32 bits lengths for short 
 146  * keys requires a lot of space, so we check the most significant 2 bits of 
 147  * the first byte to interpreter the length: 
 149  * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte 
 150  * 01|000000 00000000 =>  01, the len is 14 byes, 6 bits + 8 bits of next byte 
 151  * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow 
 152  * 11|000000 this means: specially encoded object will follow. The six bits 
 153  *           number specify the kind of object that follows. 
 154  *           See the REDIS_RDB_ENC_* defines. 
 156  * Lenghts up to 63 are stored using a single byte, most DB keys, and may 
 157  * values, will fit inside. */ 
 158 #define REDIS_RDB_6BITLEN 0 
 159 #define REDIS_RDB_14BITLEN 1 
 160 #define REDIS_RDB_32BITLEN 2 
 161 #define REDIS_RDB_ENCVAL 3 
 162 #define REDIS_RDB_LENERR UINT_MAX 
 164 /* When a length of a string object stored on disk has the first two bits 
 165  * set, the remaining two bits specify a special encoding for the object 
 166  * accordingly to the following defines: */ 
 167 #define REDIS_RDB_ENC_INT8 0        /* 8 bit signed integer */ 
 168 #define REDIS_RDB_ENC_INT16 1       /* 16 bit signed integer */ 
 169 #define REDIS_RDB_ENC_INT32 2       /* 32 bit signed integer */ 
 170 #define REDIS_RDB_ENC_LZF 3         /* string compressed with FASTLZ */ 
 172 /* Virtual memory object->where field. */ 
 173 #define REDIS_VM_MEMORY 0       /* The object is on memory */ 
 174 #define REDIS_VM_SWAPPED 1      /* The object is on disk */ 
 175 #define REDIS_VM_SWAPPING 2     /* Redis is swapping this object on disk */ 
 176 #define REDIS_VM_LOADING 3      /* Redis is loading this object from disk */ 
 178 /* Virtual memory static configuration stuff. 
 179  * Check vmFindContiguousPages() to know more about this magic numbers. */ 
 180 #define REDIS_VM_MAX_NEAR_PAGES 65536 
 181 #define REDIS_VM_MAX_RANDOM_JUMP 4096 
 182 #define REDIS_VM_MAX_THREADS 32 
 183 #define REDIS_THREAD_STACK_SIZE (1024*1024*4) 
 184 /* The following is the *percentage* of completed I/O jobs to process when the 
 185  * handelr is called. While Virtual Memory I/O operations are performed by 
 186  * threads, this operations must be processed by the main thread when completed 
 187  * in order to take effect. */ 
 188 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1 
 191 #define REDIS_SLAVE 1       /* This client is a slave server */ 
 192 #define REDIS_MASTER 2      /* This client is a master server */ 
 193 #define REDIS_MONITOR 4     /* This client is a slave monitor, see MONITOR */ 
 194 #define REDIS_MULTI 8       /* This client is in a MULTI context */ 
 195 #define REDIS_BLOCKED 16    /* The client is waiting in a blocking operation */ 
 196 #define REDIS_IO_WAIT 32    /* The client is waiting for Virtual Memory I/O */ 
 197 #define REDIS_DIRTY_CAS 64  /* Watched keys modified. EXEC will fail. */ 
 199 /* Slave replication state - slave side */ 
 200 #define REDIS_REPL_NONE 0   /* No active replication */ 
 201 #define REDIS_REPL_CONNECT 1    /* Must connect to master */ 
 202 #define REDIS_REPL_CONNECTED 2  /* Connected to master */ 
 204 /* Slave replication state - from the point of view of master 
 205  * Note that in SEND_BULK and ONLINE state the slave receives new updates 
 206  * in its output queue. In the WAIT_BGSAVE state instead the server is waiting 
 207  * to start the next background saving in order to send updates to it. */ 
 208 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */ 
 209 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */ 
 210 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */ 
 211 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */ 
 213 /* List related stuff */ 
 217 /* Sort operations */ 
 218 #define REDIS_SORT_GET 0 
 219 #define REDIS_SORT_ASC 1 
 220 #define REDIS_SORT_DESC 2 
 221 #define REDIS_SORTKEY_MAX 1024 
 224 #define REDIS_DEBUG 0 
 225 #define REDIS_VERBOSE 1 
 226 #define REDIS_NOTICE 2 
 227 #define REDIS_WARNING 3 
 229 /* Anti-warning macro... */ 
 230 #define REDIS_NOTUSED(V) ((void) V) 
 232 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */ 
 233 #define ZSKIPLIST_P 0.25      /* Skiplist P = 1/4 */ 
 235 /* Append only defines */ 
 236 #define APPENDFSYNC_NO 0 
 237 #define APPENDFSYNC_ALWAYS 1 
 238 #define APPENDFSYNC_EVERYSEC 2 
 240 /* Zip structure related defaults */ 
 241 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64 
 242 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512 
 243 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024 
 244 #define REDIS_LIST_MAX_ZIPLIST_VALUE 32 
 246 /* We can print the stacktrace, so our assert is defined this way: */ 
 247 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1))) 
 248 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1) 
 249 static void _redisAssert(char *estr
, char *file
, int line
); 
 250 static void _redisPanic(char *msg
, char *file
, int line
); 
 252 /*================================= Data types ============================== */ 
 254 /* A redis object, that is a type able to hold a string / list / set */ 
 256 /* The actual Redis Object */ 
 257 typedef struct redisObject 
{ 
 259     unsigned storage
:2;     /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */ 
 261     unsigned lru
:22;        /* lru time (relative to server.lruclock) */ 
 264     /* VM fields, this are only allocated if VM is active, otherwise the 
 265      * object allocation function will just allocate 
 266      * sizeof(redisObjct) minus sizeof(redisObjectVM), so using 
 267      * Redis without VM active will not have any overhead. */ 
 270 /* The VM pointer structure - identifies an object in the swap file. 
 272  * This object is stored in place of the value 
 273  * object in the main key->value hash table representing a database. 
 274  * Note that the first fields (type, storage) are the same as the redisObject 
 275  * structure so that vmPointer strucuters can be accessed even when casted 
 276  * as redisObject structures. 
 278  * This is useful as we don't know if a value object is or not on disk, but we 
 279  * are always able to read obj->storage to check this. For vmPointer 
 280  * structures "type" is set to REDIS_VMPOINTER (even if without this field 
 281  * is still possible to check the kind of object from the value of 'storage').*/ 
 282 typedef struct vmPointer 
{ 
 284     unsigned storage
:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
 286     unsigned int vtype
; /* type of the object stored in the swap file */ 
 287     off_t page
;         /* the page at witch the object is stored on disk */ 
 288     off_t usedpages
;    /* number of pages used on disk */ 
 291 /* Macro used to initalize a Redis object allocated on the stack. 
 292  * Note that this macro is taken near the structure definition to make sure 
 293  * we'll update it when the structure is changed, to avoid bugs like 
 294  * bug #85 introduced exactly in this way. */ 
 295 #define initStaticStringObject(_var,_ptr) do { \ 
 297     _var.type = REDIS_STRING; \ 
 298     _var.encoding = REDIS_ENCODING_RAW; \ 
 300     _var.storage = REDIS_VM_MEMORY; \ 
 303 typedef struct redisDb 
{ 
 304     dict 
*dict
;                 /* The keyspace for this DB */ 
 305     dict 
*expires
;              /* Timeout of keys with a timeout set */ 
 306     dict 
*blocking_keys
;        /* Keys with clients waiting for data (BLPOP) */ 
 307     dict 
*io_keys
;              /* Keys with clients waiting for VM I/O */ 
 308     dict 
*watched_keys
;         /* WATCHED keys for MULTI/EXEC CAS */ 
 312 /* Client MULTI/EXEC state */ 
 313 typedef struct multiCmd 
{ 
 316     struct redisCommand 
*cmd
; 
 319 typedef struct multiState 
{ 
 320     multiCmd 
*commands
;     /* Array of MULTI commands */ 
 321     int count
;              /* Total number of MULTI commands */ 
 324 /* With multiplexing we need to take per-clinet state. 
 325  * Clients are taken in a liked list. */ 
 326 typedef struct redisClient 
{ 
 331     robj 
**argv
, **mbargv
; 
 333     int bulklen
;            /* bulk read len. -1 if not in bulk read mode */ 
 334     int multibulk
;          /* multi bulk command format active */ 
 337     time_t lastinteraction
; /* time of the last interaction, used for timeout */ 
 338     int flags
;              /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */ 
 339     int slaveseldb
;         /* slave selected db, if this client is a slave */ 
 340     int authenticated
;      /* when requirepass is non-NULL */ 
 341     int replstate
;          /* replication state if this is a slave */ 
 342     int repldbfd
;           /* replication DB file descriptor */ 
 343     long repldboff
;         /* replication DB file offset */ 
 344     off_t repldbsize
;       /* replication DB file size */ 
 345     multiState mstate
;      /* MULTI/EXEC state */ 
 346     robj 
**blocking_keys
;   /* The key we are waiting to terminate a blocking 
 347                              * operation such as BLPOP. Otherwise NULL. */ 
 348     int blocking_keys_num
;  /* Number of blocking keys */ 
 349     time_t blockingto
;      /* Blocking operation timeout. If UNIX current time 
 350                              * is >= blockingto then the operation timed out. */ 
 351     list 
*io_keys
;          /* Keys this client is waiting to be loaded from the 
 352                              * swap file in order to continue. */ 
 353     list 
*watched_keys
;     /* Keys WATCHED for MULTI/EXEC CAS */ 
 354     dict 
*pubsub_channels
;  /* channels a client is interested in (SUBSCRIBE) */ 
 355     list 
*pubsub_patterns
;  /* patterns a client is interested in (SUBSCRIBE) */ 
 363 /* Global server state structure */ 
 368     long long dirty
;            /* changes to DB from the last save */ 
 370     list 
*slaves
, *monitors
; 
 371     char neterr
[ANET_ERR_LEN
]; 
 373     int cronloops
;              /* number of times the cron function run */ 
 374     list 
*objfreelist
;          /* A list of freed objects to avoid malloc() */ 
 375     time_t lastsave
;            /* Unix time of last save succeeede */ 
 376     /* Fields used only for stats */ 
 377     time_t stat_starttime
;         /* server start time */ 
 378     long long stat_numcommands
;    /* number of processed commands */ 
 379     long long stat_numconnections
; /* number of connections received */ 
 380     long long stat_expiredkeys
;   /* number of expired keys */ 
 389     int no_appendfsync_on_rewrite
; 
 395     pid_t bgsavechildpid
; 
 396     pid_t bgrewritechildpid
; 
 397     sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */ 
 398     sds aofbuf
;       /* AOF buffer, written before entering the event loop */ 
 399     struct saveparam 
*saveparams
; 
 404     char *appendfilename
; 
 408     /* Replication related */ 
 413     redisClient 
*master
;    /* client that is master for this slave */ 
 415     unsigned int maxclients
; 
 416     unsigned long long maxmemory
; 
 417     unsigned int blpop_blocked_clients
; 
 418     unsigned int vm_blocked_clients
; 
 419     /* Sort parameters - qsort_r() is only available under BSD so we 
 420      * have to take this state global, in order to pass it to sortCompare() */ 
 424     /* Virtual memory configuration */ 
 429     unsigned long long vm_max_memory
; 
 430     /* Zip structure config */ 
 431     size_t hash_max_zipmap_entries
; 
 432     size_t hash_max_zipmap_value
; 
 433     size_t list_max_ziplist_entries
; 
 434     size_t list_max_ziplist_value
; 
 435     /* Virtual memory state */ 
 438     off_t vm_next_page
; /* Next probably empty page */ 
 439     off_t vm_near_pages
; /* Number of pages allocated sequentially */ 
 440     unsigned char *vm_bitmap
; /* Bitmap of free/used pages */ 
 441     time_t unixtime
;    /* Unix time sampled every second. */ 
 442     /* Virtual memory I/O threads stuff */ 
 443     /* An I/O thread process an element taken from the io_jobs queue and 
 444      * put the result of the operation in the io_done list. While the 
 445      * job is being processed, it's put on io_processing queue. */ 
 446     list 
*io_newjobs
; /* List of VM I/O jobs yet to be processed */ 
 447     list 
*io_processing
; /* List of VM I/O jobs being processed */ 
 448     list 
*io_processed
; /* List of VM I/O jobs already processed */ 
 449     list 
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */ 
 450     pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */ 
 451     pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */ 
 452     pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */ 
 453     pthread_attr_t io_threads_attr
; /* attributes for threads creation */ 
 454     int io_active_threads
; /* Number of running I/O threads */ 
 455     int vm_max_threads
; /* Max number of I/O threads running at the same time */ 
 456     /* Our main thread is blocked on the event loop, locking for sockets ready 
 457      * to be read or written, so when a threaded I/O operation is ready to be 
 458      * processed by the main thread, the I/O thread will use a unix pipe to 
 459      * awake the main thread. The followings are the two pipe FDs. */ 
 460     int io_ready_pipe_read
; 
 461     int io_ready_pipe_write
; 
 462     /* Virtual memory stats */ 
 463     unsigned long long vm_stats_used_pages
; 
 464     unsigned long long vm_stats_swapped_objects
; 
 465     unsigned long long vm_stats_swapouts
; 
 466     unsigned long long vm_stats_swapins
; 
 468     dict 
*pubsub_channels
; /* Map channels to list of subscribed clients */ 
 469     list 
*pubsub_patterns
; /* A list of pubsub_patterns */ 
 472     unsigned lruclock
:22;        /* clock incrementing every minute, for LRU */ 
 473     unsigned lruclock_padding
:10; 
 476 typedef struct pubsubPattern 
{ 
 481 typedef void redisCommandProc(redisClient 
*c
); 
 482 typedef void redisVmPreloadProc(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 483 struct redisCommand 
{ 
 485     redisCommandProc 
*proc
; 
 488     /* Use a function to determine which keys need to be loaded 
 489      * in the background prior to executing this command. Takes precedence 
 490      * over vm_firstkey and others, ignored when NULL */ 
 491     redisVmPreloadProc 
*vm_preload_proc
; 
 492     /* What keys should be loaded in background when calling this command? */ 
 493     int vm_firstkey
; /* The first argument that's a key (0 = no keys) */ 
 494     int vm_lastkey
;  /* THe last argument that's a key */ 
 495     int vm_keystep
;  /* The step between first and last key */ 
 498 struct redisFunctionSym 
{ 
 500     unsigned long pointer
; 
 503 typedef struct _redisSortObject 
{ 
 511 typedef struct _redisSortOperation 
{ 
 514 } redisSortOperation
; 
 516 /* ZSETs use a specialized version of Skiplists */ 
 518 typedef struct zskiplistNode 
{ 
 519     struct zskiplistNode 
**forward
; 
 520     struct zskiplistNode 
*backward
; 
 526 typedef struct zskiplist 
{ 
 527     struct zskiplistNode 
*header
, *tail
; 
 528     unsigned long length
; 
 532 typedef struct zset 
{ 
 537 /* Our shared "common" objects */ 
 539 #define REDIS_SHARED_INTEGERS 10000 
 540 struct sharedObjectsStruct 
{ 
 541     robj 
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
, 
 542     *colon
, *nullbulk
, *nullmultibulk
, *queued
, 
 543     *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
, 
 544     *outofrangeerr
, *plus
, 
 545     *select0
, *select1
, *select2
, *select3
, *select4
, 
 546     *select5
, *select6
, *select7
, *select8
, *select9
, 
 547     *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
, 
 548     *mbulk4
, *psubscribebulk
, *punsubscribebulk
, 
 549     *integers
[REDIS_SHARED_INTEGERS
]; 
 552 /* Global vars that are actally used as constants. The following double 
 553  * values are used for double on-disk serialization, and are initialized 
 554  * at runtime to avoid strange compiler optimizations. */ 
 556 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
; 
 558 /* VM threaded I/O request message */ 
 559 #define REDIS_IOJOB_LOAD 0          /* Load from disk to memory */ 
 560 #define REDIS_IOJOB_PREPARE_SWAP 1  /* Compute needed pages */ 
 561 #define REDIS_IOJOB_DO_SWAP 2       /* Swap from memory to disk */ 
 562 typedef struct iojob 
{ 
 563     int type
;   /* Request type, REDIS_IOJOB_* */ 
 564     redisDb 
*db
;/* Redis database */ 
 565     robj 
*key
;  /* This I/O request is about swapping this key */ 
 566     robj 
*id
;   /* Unique identifier of this job: 
 567                    this is the object to swap for REDIS_IOREQ_*_SWAP, or the 
 568                    vmpointer objct for REDIS_IOREQ_LOAD. */ 
 569     robj 
*val
;  /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this 
 570                  * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */ 
 571     off_t page
; /* Swap page where to read/write the object */ 
 572     off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */ 
 573     int canceled
; /* True if this command was canceled by blocking side of VM */ 
 574     pthread_t thread
; /* ID of the thread processing this entry */ 
 577 /*================================ Prototypes =============================== */ 
 579 static void freeStringObject(robj 
*o
); 
 580 static void freeListObject(robj 
*o
); 
 581 static void freeSetObject(robj 
*o
); 
 582 static void decrRefCount(void *o
); 
 583 static robj 
*createObject(int type
, void *ptr
); 
 584 static void freeClient(redisClient 
*c
); 
 585 static int rdbLoad(char *filename
); 
 586 static void addReply(redisClient 
*c
, robj 
*obj
); 
 587 static void addReplySds(redisClient 
*c
, sds s
); 
 588 static void incrRefCount(robj 
*o
); 
 589 static int rdbSaveBackground(char *filename
); 
 590 static robj 
*createStringObject(char *ptr
, size_t len
); 
 591 static robj 
*dupStringObject(robj 
*o
); 
 592 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
); 
 593 static void replicationFeedMonitors(list 
*monitors
, int dictid
, robj 
**argv
, int argc
); 
 594 static void flushAppendOnlyFile(void); 
 595 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
); 
 596 static int syncWithMaster(void); 
 597 static robj 
*tryObjectEncoding(robj 
*o
); 
 598 static robj 
*getDecodedObject(robj 
*o
); 
 599 static int removeExpire(redisDb 
*db
, robj 
*key
); 
 600 static int expireIfNeeded(redisDb 
*db
, robj 
*key
); 
 601 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
); 
 602 static int dbDelete(redisDb 
*db
, robj 
*key
); 
 603 static time_t getExpire(redisDb 
*db
, robj 
*key
); 
 604 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
); 
 605 static void updateSlavesWaitingBgsave(int bgsaveerr
); 
 606 static void freeMemoryIfNeeded(void); 
 607 static int processCommand(redisClient 
*c
); 
 608 static void setupSigSegvAction(void); 
 609 static void rdbRemoveTempFile(pid_t childpid
); 
 610 static void aofRemoveTempFile(pid_t childpid
); 
 611 static size_t stringObjectLen(robj 
*o
); 
 612 static void processInputBuffer(redisClient 
*c
); 
 613 static zskiplist 
*zslCreate(void); 
 614 static void zslFree(zskiplist 
*zsl
); 
 615 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
); 
 616 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 617 static void initClientMultiState(redisClient 
*c
); 
 618 static void freeClientMultiState(redisClient 
*c
); 
 619 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
); 
 620 static void unblockClientWaitingData(redisClient 
*c
); 
 621 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
); 
 622 static void vmInit(void); 
 623 static void vmMarkPagesFree(off_t page
, off_t count
); 
 624 static robj 
*vmLoadObject(robj 
*o
); 
 625 static robj 
*vmPreviewObject(robj 
*o
); 
 626 static int vmSwapOneObjectBlocking(void); 
 627 static int vmSwapOneObjectThreaded(void); 
 628 static int vmCanSwapOut(void); 
 629 static int tryFreeOneObjectFromFreelist(void); 
 630 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 631 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 632 static void vmCancelThreadedIOJob(robj 
*o
); 
 633 static void lockThreadedIO(void); 
 634 static void unlockThreadedIO(void); 
 635 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
); 
 636 static void freeIOJob(iojob 
*j
); 
 637 static void queueIOJob(iojob 
*j
); 
 638 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
); 
 639 static robj 
*vmReadObjectFromSwap(off_t page
, int type
); 
 640 static void waitEmptyIOJobsQueue(void); 
 641 static void vmReopenSwapFile(void); 
 642 static int vmFreePage(off_t page
); 
 643 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 644 static void execBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
); 
 645 static int blockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
); 
 646 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
); 
 647 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
); 
 648 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
); 
 649 static struct redisCommand 
*lookupCommand(char *name
); 
 650 static void call(redisClient 
*c
, struct redisCommand 
*cmd
); 
 651 static void resetClient(redisClient 
*c
); 
 652 static void convertToRealHash(robj 
*o
); 
 653 static void listTypeConvert(robj 
*o
, int enc
); 
 654 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
); 
 655 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
); 
 656 static void freePubsubPattern(void *p
); 
 657 static int listMatchPubsubPattern(void *a
, void *b
); 
 658 static int compareStringObjects(robj 
*a
, robj 
*b
); 
 659 static int equalStringObjects(robj 
*a
, robj 
*b
); 
 661 static int rewriteAppendOnlyFileBackground(void); 
 662 static vmpointer 
*vmSwapObjectBlocking(robj 
*val
); 
 663 static int prepareForShutdown(); 
 664 static void touchWatchedKey(redisDb 
*db
, robj 
*key
); 
 665 static void touchWatchedKeysOnFlush(int dbid
); 
 666 static void unwatchAllKeys(redisClient 
*c
); 
 668 static void authCommand(redisClient 
*c
); 
 669 static void pingCommand(redisClient 
*c
); 
 670 static void echoCommand(redisClient 
*c
); 
 671 static void setCommand(redisClient 
*c
); 
 672 static void setnxCommand(redisClient 
*c
); 
 673 static void setexCommand(redisClient 
*c
); 
 674 static void getCommand(redisClient 
*c
); 
 675 static void delCommand(redisClient 
*c
); 
 676 static void existsCommand(redisClient 
*c
); 
 677 static void incrCommand(redisClient 
*c
); 
 678 static void decrCommand(redisClient 
*c
); 
 679 static void incrbyCommand(redisClient 
*c
); 
 680 static void decrbyCommand(redisClient 
*c
); 
 681 static void selectCommand(redisClient 
*c
); 
 682 static void randomkeyCommand(redisClient 
*c
); 
 683 static void keysCommand(redisClient 
*c
); 
 684 static void dbsizeCommand(redisClient 
*c
); 
 685 static void lastsaveCommand(redisClient 
*c
); 
 686 static void saveCommand(redisClient 
*c
); 
 687 static void bgsaveCommand(redisClient 
*c
); 
 688 static void bgrewriteaofCommand(redisClient 
*c
); 
 689 static void shutdownCommand(redisClient 
*c
); 
 690 static void moveCommand(redisClient 
*c
); 
 691 static void renameCommand(redisClient 
*c
); 
 692 static void renamenxCommand(redisClient 
*c
); 
 693 static void lpushCommand(redisClient 
*c
); 
 694 static void rpushCommand(redisClient 
*c
); 
 695 static void lpopCommand(redisClient 
*c
); 
 696 static void rpopCommand(redisClient 
*c
); 
 697 static void llenCommand(redisClient 
*c
); 
 698 static void lindexCommand(redisClient 
*c
); 
 699 static void lrangeCommand(redisClient 
*c
); 
 700 static void ltrimCommand(redisClient 
*c
); 
 701 static void typeCommand(redisClient 
*c
); 
 702 static void lsetCommand(redisClient 
*c
); 
 703 static void saddCommand(redisClient 
*c
); 
 704 static void sremCommand(redisClient 
*c
); 
 705 static void smoveCommand(redisClient 
*c
); 
 706 static void sismemberCommand(redisClient 
*c
); 
 707 static void scardCommand(redisClient 
*c
); 
 708 static void spopCommand(redisClient 
*c
); 
 709 static void srandmemberCommand(redisClient 
*c
); 
 710 static void sinterCommand(redisClient 
*c
); 
 711 static void sinterstoreCommand(redisClient 
*c
); 
 712 static void sunionCommand(redisClient 
*c
); 
 713 static void sunionstoreCommand(redisClient 
*c
); 
 714 static void sdiffCommand(redisClient 
*c
); 
 715 static void sdiffstoreCommand(redisClient 
*c
); 
 716 static void syncCommand(redisClient 
*c
); 
 717 static void flushdbCommand(redisClient 
*c
); 
 718 static void flushallCommand(redisClient 
*c
); 
 719 static void sortCommand(redisClient 
*c
); 
 720 static void lremCommand(redisClient 
*c
); 
 721 static void rpoplpushcommand(redisClient 
*c
); 
 722 static void infoCommand(redisClient 
*c
); 
 723 static void mgetCommand(redisClient 
*c
); 
 724 static void monitorCommand(redisClient 
*c
); 
 725 static void expireCommand(redisClient 
*c
); 
 726 static void expireatCommand(redisClient 
*c
); 
 727 static void getsetCommand(redisClient 
*c
); 
 728 static void ttlCommand(redisClient 
*c
); 
 729 static void slaveofCommand(redisClient 
*c
); 
 730 static void debugCommand(redisClient 
*c
); 
 731 static void msetCommand(redisClient 
*c
); 
 732 static void msetnxCommand(redisClient 
*c
); 
 733 static void zaddCommand(redisClient 
*c
); 
 734 static void zincrbyCommand(redisClient 
*c
); 
 735 static void zrangeCommand(redisClient 
*c
); 
 736 static void zrangebyscoreCommand(redisClient 
*c
); 
 737 static void zcountCommand(redisClient 
*c
); 
 738 static void zrevrangeCommand(redisClient 
*c
); 
 739 static void zcardCommand(redisClient 
*c
); 
 740 static void zremCommand(redisClient 
*c
); 
 741 static void zscoreCommand(redisClient 
*c
); 
 742 static void zremrangebyscoreCommand(redisClient 
*c
); 
 743 static void multiCommand(redisClient 
*c
); 
 744 static void execCommand(redisClient 
*c
); 
 745 static void discardCommand(redisClient 
*c
); 
 746 static void blpopCommand(redisClient 
*c
); 
 747 static void brpopCommand(redisClient 
*c
); 
 748 static void appendCommand(redisClient 
*c
); 
 749 static void substrCommand(redisClient 
*c
); 
 750 static void zrankCommand(redisClient 
*c
); 
 751 static void zrevrankCommand(redisClient 
*c
); 
 752 static void hsetCommand(redisClient 
*c
); 
 753 static void hsetnxCommand(redisClient 
*c
); 
 754 static void hgetCommand(redisClient 
*c
); 
 755 static void hmsetCommand(redisClient 
*c
); 
 756 static void hmgetCommand(redisClient 
*c
); 
 757 static void hdelCommand(redisClient 
*c
); 
 758 static void hlenCommand(redisClient 
*c
); 
 759 static void zremrangebyrankCommand(redisClient 
*c
); 
 760 static void zunionstoreCommand(redisClient 
*c
); 
 761 static void zinterstoreCommand(redisClient 
*c
); 
 762 static void hkeysCommand(redisClient 
*c
); 
 763 static void hvalsCommand(redisClient 
*c
); 
 764 static void hgetallCommand(redisClient 
*c
); 
 765 static void hexistsCommand(redisClient 
*c
); 
 766 static void configCommand(redisClient 
*c
); 
 767 static void hincrbyCommand(redisClient 
*c
); 
 768 static void subscribeCommand(redisClient 
*c
); 
 769 static void unsubscribeCommand(redisClient 
*c
); 
 770 static void psubscribeCommand(redisClient 
*c
); 
 771 static void punsubscribeCommand(redisClient 
*c
); 
 772 static void publishCommand(redisClient 
*c
); 
 773 static void watchCommand(redisClient 
*c
); 
 774 static void unwatchCommand(redisClient 
*c
); 
 776 /*================================= Globals ================================= */ 
 779 static struct redisServer server
; /* server global state */ 
 780 static struct redisCommand 
*commandTable
; 
 781 static struct redisCommand readonlyCommandTable
[] = { 
 782     {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 783     {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 784     {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 785     {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0}, 
 786     {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 787     {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 788     {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 789     {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 790     {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 791     {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 792     {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1}, 
 793     {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 794     {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 795     {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 796     {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 797     {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 798     {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 799     {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 800     {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 801     {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 802     {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 803     {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 804     {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 805     {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1}, 
 806     {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 807     {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 808     {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1}, 
 809     {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 810     {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 811     {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 812     {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 813     {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 814     {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 815     {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 816     {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 817     {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1}, 
 818     {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1}, 
 819     {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 820     {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 821     {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 822     {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 823     {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 824     {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 825     {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 826     {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0}, 
 827     {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 828     {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 829     {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 830     {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 831     {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 832     {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 833     {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 834     {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 835     {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 836     {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 837     {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 838     {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 839     {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 840     {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 841     {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 842     {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 843     {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 844     {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 845     {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 846     {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1}, 
 847     {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 848     {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 849     {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 850     {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 851     {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2}, 
 852     {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 853     {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 854     {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 855     {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 856     {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 857     {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 858     {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 859     {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 860     {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 861     {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 862     {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 863     {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 864     {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 865     {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 866     {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 867     {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 868     {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 869     {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 870     {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 871     {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0}, 
 872     {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 873     {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 874     {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 875     {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 876     {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1}, 
 877     {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 878     {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 879     {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1}, 
 880     {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 881     {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 882     {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0}, 
 883     {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 884     {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 885     {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 886     {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 887     {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0}, 
 888     {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0}, 
 889     {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0} 
 892 /*============================ Utility functions ============================ */ 
 894 /* Glob-style pattern matching. */ 
 895 static int stringmatchlen(const char *pattern
, int patternLen
, 
 896         const char *string
, int stringLen
, int nocase
) 
 901             while (pattern
[1] == '*') { 
 906                 return 1; /* match */ 
 908                 if (stringmatchlen(pattern
+1, patternLen
-1, 
 909                             string
, stringLen
, nocase
)) 
 910                     return 1; /* match */ 
 914             return 0; /* no match */ 
 918                 return 0; /* no match */ 
 928             not = pattern
[0] == '^'; 
 935                 if (pattern
[0] == '\\') { 
 938                     if (pattern
[0] == string
[0]) 
 940                 } else if (pattern
[0] == ']') { 
 942                 } else if (patternLen 
== 0) { 
 946                 } else if (pattern
[1] == '-' && patternLen 
>= 3) { 
 947                     int start 
= pattern
[0]; 
 948                     int end 
= pattern
[2]; 
 956                         start 
= tolower(start
); 
 962                     if (c 
>= start 
&& c 
<= end
) 
 966                         if (pattern
[0] == string
[0]) 
 969                         if (tolower((int)pattern
[0]) == tolower((int)string
[0])) 
 979                 return 0; /* no match */ 
 985             if (patternLen 
>= 2) { 
 992                 if (pattern
[0] != string
[0]) 
 993                     return 0; /* no match */ 
 995                 if (tolower((int)pattern
[0]) != tolower((int)string
[0])) 
 996                     return 0; /* no match */ 
1004         if (stringLen 
== 0) { 
1005             while(*pattern 
== '*') { 
1012     if (patternLen 
== 0 && stringLen 
== 0) 
1017 static int stringmatch(const char *pattern
, const char *string
, int nocase
) { 
1018     return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
); 
1021 /* Convert a string representing an amount of memory into the number of 
1022  * bytes, so for instance memtoll("1Gi") will return 1073741824 that is 
1025  * On parsing error, if *err is not NULL, it's set to 1, otherwise it's 
1027 static long long memtoll(const char *p
, int *err
) { 
1030     long mul
; /* unit multiplier */ 
1032     unsigned int digits
; 
1035     /* Search the first non digit character. */ 
1038     while(*u 
&& isdigit(*u
)) u
++; 
1039     if (*u 
== '\0' || !strcasecmp(u
,"b")) { 
1041     } else if (!strcasecmp(u
,"k")) { 
1043     } else if (!strcasecmp(u
,"kb")) { 
1045     } else if (!strcasecmp(u
,"m")) { 
1047     } else if (!strcasecmp(u
,"mb")) { 
1049     } else if (!strcasecmp(u
,"g")) { 
1050         mul 
= 1000L*1000*1000; 
1051     } else if (!strcasecmp(u
,"gb")) { 
1052         mul 
= 1024L*1024*1024; 
1058     if (digits 
>= sizeof(buf
)) { 
1062     memcpy(buf
,p
,digits
); 
1064     val 
= strtoll(buf
,NULL
,10); 
1068 /* Convert a long long into a string. Returns the number of 
1069  * characters needed to represent the number, that can be shorter if passed 
1070  * buffer length is not enough to store the whole number. */ 
1071 static int ll2string(char *s
, size_t len
, long long value
) { 
1073     unsigned long long v
; 
1076     if (len 
== 0) return 0; 
1077     v 
= (value 
< 0) ? -value 
: value
; 
1078     p 
= buf
+31; /* point to the last character */ 
1083     if (value 
< 0) *p
-- = '-'; 
1086     if (l
+1 > len
) l 
= len
-1; /* Make sure it fits, including the nul term */ 
1092 static void redisLog(int level
, const char *fmt
, ...) { 
1096     fp 
= (server
.logfile 
== NULL
) ? stdout 
: fopen(server
.logfile
,"a"); 
1100     if (level 
>= server
.verbosity
) { 
1106         strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
)); 
1107         fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]); 
1108         vfprintf(fp
, fmt
, ap
); 
1114     if (server
.logfile
) fclose(fp
); 
1117 /*====================== Hash table type implementation  ==================== */ 
1119 /* This is an hash table type that uses the SDS dynamic strings libary as 
1120  * keys and radis objects as values (objects can hold SDS strings, 
1123 static void dictVanillaFree(void *privdata
, void *val
) 
1125     DICT_NOTUSED(privdata
); 
1129 static void dictListDestructor(void *privdata
, void *val
) 
1131     DICT_NOTUSED(privdata
); 
1132     listRelease((list
*)val
); 
1135 static int dictSdsKeyCompare(void *privdata
, const void *key1
, 
1139     DICT_NOTUSED(privdata
); 
1141     l1 
= sdslen((sds
)key1
); 
1142     l2 
= sdslen((sds
)key2
); 
1143     if (l1 
!= l2
) return 0; 
1144     return memcmp(key1
, key2
, l1
) == 0; 
1147 static void dictRedisObjectDestructor(void *privdata
, void *val
) 
1149     DICT_NOTUSED(privdata
); 
1151     if (val 
== NULL
) return; /* Values of swapped out keys as set to NULL */ 
1155 static void dictSdsDestructor(void *privdata
, void *val
) 
1157     DICT_NOTUSED(privdata
); 
1162 static int dictObjKeyCompare(void *privdata
, const void *key1
, 
1165     const robj 
*o1 
= key1
, *o2 
= key2
; 
1166     return dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1169 static unsigned int dictObjHash(const void *key
) { 
1170     const robj 
*o 
= key
; 
1171     return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1174 static unsigned int dictSdsHash(const void *key
) { 
1175     return dictGenHashFunction((unsigned char*)key
, sdslen((char*)key
)); 
1178 static int dictEncObjKeyCompare(void *privdata
, const void *key1
, 
1181     robj 
*o1 
= (robj
*) key1
, *o2 
= (robj
*) key2
; 
1184     if (o1
->encoding 
== REDIS_ENCODING_INT 
&& 
1185         o2
->encoding 
== REDIS_ENCODING_INT
) 
1186             return o1
->ptr 
== o2
->ptr
; 
1188     o1 
= getDecodedObject(o1
); 
1189     o2 
= getDecodedObject(o2
); 
1190     cmp 
= dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
); 
1196 static unsigned int dictEncObjHash(const void *key
) { 
1197     robj 
*o 
= (robj
*) key
; 
1199     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
1200         return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1202         if (o
->encoding 
== REDIS_ENCODING_INT
) { 
1206             len 
= ll2string(buf
,32,(long)o
->ptr
); 
1207             return dictGenHashFunction((unsigned char*)buf
, len
); 
1211             o 
= getDecodedObject(o
); 
1212             hash 
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
)); 
1220 static dictType setDictType 
= { 
1221     dictEncObjHash
,            /* hash function */ 
1224     dictEncObjKeyCompare
,      /* key compare */ 
1225     dictRedisObjectDestructor
, /* key destructor */ 
1226     NULL                       
/* val destructor */ 
1229 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ 
1230 static dictType zsetDictType 
= { 
1231     dictEncObjHash
,            /* hash function */ 
1234     dictEncObjKeyCompare
,      /* key compare */ 
1235     dictRedisObjectDestructor
, /* key destructor */ 
1236     dictVanillaFree            
/* val destructor of malloc(sizeof(double)) */ 
1239 /* Db->dict, keys are sds strings, vals are Redis objects. */ 
1240 static dictType dbDictType 
= { 
1241     dictSdsHash
,                /* hash function */ 
1244     dictSdsKeyCompare
,          /* key compare */ 
1245     dictSdsDestructor
,          /* key destructor */ 
1246     dictRedisObjectDestructor   
/* val destructor */ 
1250 static dictType keyptrDictType 
= { 
1251     dictSdsHash
,               /* hash function */ 
1254     dictSdsKeyCompare
,         /* key compare */ 
1255     dictSdsDestructor
,         /* key destructor */ 
1256     NULL                       
/* val destructor */ 
1259 /* Hash type hash table (note that small hashes are represented with zimpaps) */ 
1260 static dictType hashDictType 
= { 
1261     dictEncObjHash
,             /* hash function */ 
1264     dictEncObjKeyCompare
,       /* key compare */ 
1265     dictRedisObjectDestructor
,  /* key destructor */ 
1266     dictRedisObjectDestructor   
/* val destructor */ 
1269 /* Keylist hash table type has unencoded redis objects as keys and 
1270  * lists as values. It's used for blocking operations (BLPOP) and to 
1271  * map swapped keys to a list of clients waiting for this keys to be loaded. */ 
1272 static dictType keylistDictType 
= { 
1273     dictObjHash
,                /* hash function */ 
1276     dictObjKeyCompare
,          /* key compare */ 
1277     dictRedisObjectDestructor
,  /* key destructor */ 
1278     dictListDestructor          
/* val destructor */ 
1281 static void version(); 
1283 /* ========================= Random utility functions ======================= */ 
1285 /* Redis generally does not try to recover from out of memory conditions 
1286  * when allocating objects or strings, it is not clear if it will be possible 
1287  * to report this condition to the client since the networking layer itself 
1288  * is based on heap allocation for send buffers, so we simply abort. 
1289  * At least the code will be simpler to read... */ 
1290 static void oom(const char *msg
) { 
1291     redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
); 
1296 /* ====================== Redis server networking stuff ===================== */ 
1297 static void closeTimedoutClients(void) { 
1300     time_t now 
= time(NULL
); 
1303     listRewind(server
.clients
,&li
); 
1304     while ((ln 
= listNext(&li
)) != NULL
) { 
1305         c 
= listNodeValue(ln
); 
1306         if (server
.maxidletime 
&& 
1307             !(c
->flags 
& REDIS_SLAVE
) &&    /* no timeout for slaves */ 
1308             !(c
->flags 
& REDIS_MASTER
) &&   /* no timeout for masters */ 
1309             dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */ 
1310             listLength(c
->pubsub_patterns
) == 0 && 
1311             (now 
- c
->lastinteraction 
> server
.maxidletime
)) 
1313             redisLog(REDIS_VERBOSE
,"Closing idle client"); 
1315         } else if (c
->flags 
& REDIS_BLOCKED
) { 
1316             if (c
->blockingto 
!= 0 && c
->blockingto 
< now
) { 
1317                 addReply(c
,shared
.nullmultibulk
); 
1318                 unblockClientWaitingData(c
); 
1324 static int htNeedsResize(dict 
*dict
) { 
1325     long long size
, used
; 
1327     size 
= dictSlots(dict
); 
1328     used 
= dictSize(dict
); 
1329     return (size 
&& used 
&& size 
> DICT_HT_INITIAL_SIZE 
&& 
1330             (used
*100/size 
< REDIS_HT_MINFILL
)); 
1333 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL 
1334  * we resize the hash table to save memory */ 
1335 static void tryResizeHashTables(void) { 
1338     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1339         if (htNeedsResize(server
.db
[j
].dict
)) 
1340             dictResize(server
.db
[j
].dict
); 
1341         if (htNeedsResize(server
.db
[j
].expires
)) 
1342             dictResize(server
.db
[j
].expires
); 
1346 /* Our hash table implementation performs rehashing incrementally while 
1347  * we write/read from the hash table. Still if the server is idle, the hash 
1348  * table will use two tables for a long time. So we try to use 1 millisecond 
1349  * of CPU time at every serverCron() loop in order to rehash some key. */ 
1350 static void incrementallyRehash(void) { 
1353     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1354         if (dictIsRehashing(server
.db
[j
].dict
)) { 
1355             dictRehashMilliseconds(server
.db
[j
].dict
,1); 
1356             break; /* already used our millisecond for this loop... */ 
1361 /* A background saving child (BGSAVE) terminated its work. Handle this. */ 
1362 void backgroundSaveDoneHandler(int statloc
) { 
1363     int exitcode 
= WEXITSTATUS(statloc
); 
1364     int bysignal 
= WIFSIGNALED(statloc
); 
1366     if (!bysignal 
&& exitcode 
== 0) { 
1367         redisLog(REDIS_NOTICE
, 
1368             "Background saving terminated with success"); 
1370         server
.lastsave 
= time(NULL
); 
1371     } else if (!bysignal 
&& exitcode 
!= 0) { 
1372         redisLog(REDIS_WARNING
, "Background saving error"); 
1374         redisLog(REDIS_WARNING
, 
1375             "Background saving terminated by signal %d", WTERMSIG(statloc
)); 
1376         rdbRemoveTempFile(server
.bgsavechildpid
); 
1378     server
.bgsavechildpid 
= -1; 
1379     /* Possibly there are slaves waiting for a BGSAVE in order to be served 
1380      * (the first stage of SYNC is a bulk transfer of dump.rdb) */ 
1381     updateSlavesWaitingBgsave(exitcode 
== 0 ? REDIS_OK 
: REDIS_ERR
); 
1384 /* A background append only file rewriting (BGREWRITEAOF) terminated its work. 
1386 void backgroundRewriteDoneHandler(int statloc
) { 
1387     int exitcode 
= WEXITSTATUS(statloc
); 
1388     int bysignal 
= WIFSIGNALED(statloc
); 
1390     if (!bysignal 
&& exitcode 
== 0) { 
1394         redisLog(REDIS_NOTICE
, 
1395             "Background append only file rewriting terminated with success"); 
1396         /* Now it's time to flush the differences accumulated by the parent */ 
1397         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
); 
1398         fd 
= open(tmpfile
,O_WRONLY
|O_APPEND
); 
1400             redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
)); 
1403         /* Flush our data... */ 
1404         if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) != 
1405                 (signed) sdslen(server
.bgrewritebuf
)) { 
1406             redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
)); 
1410         redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
)); 
1411         /* Now our work is to rename the temp file into the stable file. And 
1412          * switch the file descriptor used by the server for append only. */ 
1413         if (rename(tmpfile
,server
.appendfilename
) == -1) { 
1414             redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
)); 
1418         /* Mission completed... almost */ 
1419         redisLog(REDIS_NOTICE
,"Append only file successfully rewritten."); 
1420         if (server
.appendfd 
!= -1) { 
1421             /* If append only is actually enabled... */ 
1422             close(server
.appendfd
); 
1423             server
.appendfd 
= fd
; 
1424             if (server
.appendfsync 
!= APPENDFSYNC_NO
) aof_fsync(fd
); 
1425             server
.appendseldb 
= -1; /* Make sure it will issue SELECT */ 
1426             redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends."); 
1428             /* If append only is disabled we just generate a dump in this 
1429              * format. Why not? */ 
1432     } else if (!bysignal 
&& exitcode 
!= 0) { 
1433         redisLog(REDIS_WARNING
, "Background append only file rewriting error"); 
1435         redisLog(REDIS_WARNING
, 
1436             "Background append only file rewriting terminated by signal %d", 
1440     sdsfree(server
.bgrewritebuf
); 
1441     server
.bgrewritebuf 
= sdsempty(); 
1442     aofRemoveTempFile(server
.bgrewritechildpid
); 
1443     server
.bgrewritechildpid 
= -1; 
1446 /* This function is called once a background process of some kind terminates, 
1447  * as we want to avoid resizing the hash tables when there is a child in order 
1448  * to play well with copy-on-write (otherwise when a resize happens lots of 
1449  * memory pages are copied). The goal of this function is to update the ability 
1450  * for dict.c to resize the hash tables accordingly to the fact we have o not 
1451  * running childs. */ 
1452 static void updateDictResizePolicy(void) { 
1453     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) 
1456         dictDisableResize(); 
1459 static int serverCron(struct aeEventLoop 
*eventLoop
, long long id
, void *clientData
) { 
1460     int j
, loops 
= server
.cronloops
++; 
1461     REDIS_NOTUSED(eventLoop
); 
1463     REDIS_NOTUSED(clientData
); 
1465     /* We take a cached value of the unix time in the global state because 
1466      * with virtual memory and aging there is to store the current time 
1467      * in objects at every object access, and accuracy is not needed. 
1468      * To access a global var is faster than calling time(NULL) */ 
1469     server
.unixtime 
= time(NULL
); 
1470     /* We have just 21 bits per object for LRU information. 
1471      * So we use an (eventually wrapping) LRU clock with minutes resolution. 
1473      * When we need to select what object to swap, we compute the minimum 
1474      * time distance between the current lruclock and the object last access 
1475      * lruclock info. Even if clocks will wrap on overflow, there is 
1476      * the interesting property that we are sure that at least 
1477      * ABS(A-B) minutes passed between current time and timestamp B. 
1479      * This is not precise but we don't need at all precision, but just 
1480      * something statistically reasonable. 
1482     server
.lruclock 
= (time(NULL
)/60)&((1<<21)-1); 
1484     /* We received a SIGTERM, shutting down here in a safe way, as it is 
1485      * not ok doing so inside the signal handler. */ 
1486     if (server
.shutdown_asap
) { 
1487         if (prepareForShutdown() == REDIS_OK
) exit(0); 
1488         redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information"); 
1491     /* Show some info about non-empty databases */ 
1492     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1493         long long size
, used
, vkeys
; 
1495         size 
= dictSlots(server
.db
[j
].dict
); 
1496         used 
= dictSize(server
.db
[j
].dict
); 
1497         vkeys 
= dictSize(server
.db
[j
].expires
); 
1498         if (!(loops 
% 50) && (used 
|| vkeys
)) { 
1499             redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
); 
1500             /* dictPrintStats(server.dict); */ 
1504     /* We don't want to resize the hash tables while a bacground saving 
1505      * is in progress: the saving child is created using fork() that is 
1506      * implemented with a copy-on-write semantic in most modern systems, so 
1507      * if we resize the HT while there is the saving child at work actually 
1508      * a lot of memory movements in the parent will cause a lot of pages 
1510     if (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1) { 
1511         if (!(loops 
% 10)) tryResizeHashTables(); 
1512         if (server
.activerehashing
) incrementallyRehash(); 
1515     /* Show information about connected clients */ 
1516     if (!(loops 
% 50)) { 
1517         redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use", 
1518             listLength(server
.clients
)-listLength(server
.slaves
), 
1519             listLength(server
.slaves
), 
1520             zmalloc_used_memory()); 
1523     /* Close connections of timedout clients */ 
1524     if ((server
.maxidletime 
&& !(loops 
% 100)) || server
.blpop_blocked_clients
) 
1525         closeTimedoutClients(); 
1527     /* Check if a background saving or AOF rewrite in progress terminated */ 
1528     if (server
.bgsavechildpid 
!= -1 || server
.bgrewritechildpid 
!= -1) { 
1532         if ((pid 
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) { 
1533             if (pid 
== server
.bgsavechildpid
) { 
1534                 backgroundSaveDoneHandler(statloc
); 
1536                 backgroundRewriteDoneHandler(statloc
); 
1538             updateDictResizePolicy(); 
1541         /* If there is not a background saving in progress check if 
1542          * we have to save now */ 
1543          time_t now 
= time(NULL
); 
1544          for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
1545             struct saveparam 
*sp 
= server
.saveparams
+j
; 
1547             if (server
.dirty 
>= sp
->changes 
&& 
1548                 now
-server
.lastsave 
> sp
->seconds
) { 
1549                 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...", 
1550                     sp
->changes
, sp
->seconds
); 
1551                 rdbSaveBackground(server
.dbfilename
); 
1557     /* Try to expire a few timed out keys. The algorithm used is adaptive and 
1558      * will use few CPU cycles if there are few expiring keys, otherwise 
1559      * it will get more aggressive to avoid that too much memory is used by 
1560      * keys that can be removed from the keyspace. */ 
1561     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1563         redisDb 
*db 
= server
.db
+j
; 
1565         /* Continue to expire if at the end of the cycle more than 25% 
1566          * of the keys were expired. */ 
1568             long num 
= dictSize(db
->expires
); 
1569             time_t now 
= time(NULL
); 
1572             if (num 
> REDIS_EXPIRELOOKUPS_PER_CRON
) 
1573                 num 
= REDIS_EXPIRELOOKUPS_PER_CRON
; 
1578                 if ((de 
= dictGetRandomKey(db
->expires
)) == NULL
) break; 
1579                 t 
= (time_t) dictGetEntryVal(de
); 
1581                     sds key 
= dictGetEntryKey(de
); 
1582                     robj 
*keyobj 
= createStringObject(key
,sdslen(key
)); 
1584                     dbDelete(db
,keyobj
); 
1585                     decrRefCount(keyobj
); 
1587                     server
.stat_expiredkeys
++; 
1590         } while (expired 
> REDIS_EXPIRELOOKUPS_PER_CRON
/4); 
1593     /* Swap a few keys on disk if we are over the memory limit and VM 
1594      * is enbled. Try to free objects from the free list first. */ 
1595     if (vmCanSwapOut()) { 
1596         while (server
.vm_enabled 
&& zmalloc_used_memory() > 
1597                 server
.vm_max_memory
) 
1601             if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
1602             retval 
= (server
.vm_max_threads 
== 0) ? 
1603                         vmSwapOneObjectBlocking() : 
1604                         vmSwapOneObjectThreaded(); 
1605             if (retval 
== REDIS_ERR 
&& !(loops 
% 300) && 
1606                 zmalloc_used_memory() > 
1607                 (server
.vm_max_memory
+server
.vm_max_memory
/10)) 
1609                 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!"); 
1611             /* Note that when using threade I/O we free just one object, 
1612              * because anyway when the I/O thread in charge to swap this 
1613              * object out will finish, the handler of completed jobs 
1614              * will try to swap more objects if we are still out of memory. */ 
1615             if (retval 
== REDIS_ERR 
|| server
.vm_max_threads 
> 0) break; 
1619     /* Check if we should connect to a MASTER */ 
1620     if (server
.replstate 
== REDIS_REPL_CONNECT 
&& !(loops 
% 10)) { 
1621         redisLog(REDIS_NOTICE
,"Connecting to MASTER..."); 
1622         if (syncWithMaster() == REDIS_OK
) { 
1623             redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded"); 
1624             if (server
.appendonly
) rewriteAppendOnlyFileBackground(); 
1630 /* This function gets called every time Redis is entering the 
1631  * main loop of the event driven library, that is, before to sleep 
1632  * for ready file descriptors. */ 
1633 static void beforeSleep(struct aeEventLoop 
*eventLoop
) { 
1634     REDIS_NOTUSED(eventLoop
); 
1636     /* Awake clients that got all the swapped keys they requested */ 
1637     if (server
.vm_enabled 
&& listLength(server
.io_ready_clients
)) { 
1641         listRewind(server
.io_ready_clients
,&li
); 
1642         while((ln 
= listNext(&li
))) { 
1643             redisClient 
*c 
= ln
->value
; 
1644             struct redisCommand 
*cmd
; 
1646             /* Resume the client. */ 
1647             listDelNode(server
.io_ready_clients
,ln
); 
1648             c
->flags 
&= (~REDIS_IO_WAIT
); 
1649             server
.vm_blocked_clients
--; 
1650             aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
1651                 readQueryFromClient
, c
); 
1652             cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
1653             assert(cmd 
!= NULL
); 
1656             /* There may be more data to process in the input buffer. */ 
1657             if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) 
1658                 processInputBuffer(c
); 
1661     /* Write the AOF buffer on disk */ 
1662     flushAppendOnlyFile(); 
1665 static void createSharedObjects(void) { 
1668     shared
.crlf 
= createObject(REDIS_STRING
,sdsnew("\r\n")); 
1669     shared
.ok 
= createObject(REDIS_STRING
,sdsnew("+OK\r\n")); 
1670     shared
.err 
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n")); 
1671     shared
.emptybulk 
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n")); 
1672     shared
.czero 
= createObject(REDIS_STRING
,sdsnew(":0\r\n")); 
1673     shared
.cone 
= createObject(REDIS_STRING
,sdsnew(":1\r\n")); 
1674     shared
.nullbulk 
= createObject(REDIS_STRING
,sdsnew("$-1\r\n")); 
1675     shared
.nullmultibulk 
= createObject(REDIS_STRING
,sdsnew("*-1\r\n")); 
1676     shared
.emptymultibulk 
= createObject(REDIS_STRING
,sdsnew("*0\r\n")); 
1677     shared
.pong 
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n")); 
1678     shared
.queued 
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n")); 
1679     shared
.wrongtypeerr 
= createObject(REDIS_STRING
,sdsnew( 
1680         "-ERR Operation against a key holding the wrong kind of value\r\n")); 
1681     shared
.nokeyerr 
= createObject(REDIS_STRING
,sdsnew( 
1682         "-ERR no such key\r\n")); 
1683     shared
.syntaxerr 
= createObject(REDIS_STRING
,sdsnew( 
1684         "-ERR syntax error\r\n")); 
1685     shared
.sameobjecterr 
= createObject(REDIS_STRING
,sdsnew( 
1686         "-ERR source and destination objects are the same\r\n")); 
1687     shared
.outofrangeerr 
= createObject(REDIS_STRING
,sdsnew( 
1688         "-ERR index out of range\r\n")); 
1689     shared
.space 
= createObject(REDIS_STRING
,sdsnew(" ")); 
1690     shared
.colon 
= createObject(REDIS_STRING
,sdsnew(":")); 
1691     shared
.plus 
= createObject(REDIS_STRING
,sdsnew("+")); 
1692     shared
.select0 
= createStringObject("select 0\r\n",10); 
1693     shared
.select1 
= createStringObject("select 1\r\n",10); 
1694     shared
.select2 
= createStringObject("select 2\r\n",10); 
1695     shared
.select3 
= createStringObject("select 3\r\n",10); 
1696     shared
.select4 
= createStringObject("select 4\r\n",10); 
1697     shared
.select5 
= createStringObject("select 5\r\n",10); 
1698     shared
.select6 
= createStringObject("select 6\r\n",10); 
1699     shared
.select7 
= createStringObject("select 7\r\n",10); 
1700     shared
.select8 
= createStringObject("select 8\r\n",10); 
1701     shared
.select9 
= createStringObject("select 9\r\n",10); 
1702     shared
.messagebulk 
= createStringObject("$7\r\nmessage\r\n",13); 
1703     shared
.pmessagebulk 
= createStringObject("$8\r\npmessage\r\n",14); 
1704     shared
.subscribebulk 
= createStringObject("$9\r\nsubscribe\r\n",15); 
1705     shared
.unsubscribebulk 
= createStringObject("$11\r\nunsubscribe\r\n",18); 
1706     shared
.psubscribebulk 
= createStringObject("$10\r\npsubscribe\r\n",17); 
1707     shared
.punsubscribebulk 
= createStringObject("$12\r\npunsubscribe\r\n",19); 
1708     shared
.mbulk3 
= createStringObject("*3\r\n",4); 
1709     shared
.mbulk4 
= createStringObject("*4\r\n",4); 
1710     for (j 
= 0; j 
< REDIS_SHARED_INTEGERS
; j
++) { 
1711         shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
); 
1712         shared
.integers
[j
]->encoding 
= REDIS_ENCODING_INT
; 
1716 static void appendServerSaveParams(time_t seconds
, int changes
) { 
1717     server
.saveparams 
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1)); 
1718     server
.saveparams
[server
.saveparamslen
].seconds 
= seconds
; 
1719     server
.saveparams
[server
.saveparamslen
].changes 
= changes
; 
1720     server
.saveparamslen
++; 
1723 static void resetServerSaveParams() { 
1724     zfree(server
.saveparams
); 
1725     server
.saveparams 
= NULL
; 
1726     server
.saveparamslen 
= 0; 
1729 static void initServerConfig() { 
1730     server
.dbnum 
= REDIS_DEFAULT_DBNUM
; 
1731     server
.port 
= REDIS_SERVERPORT
; 
1732     server
.verbosity 
= REDIS_VERBOSE
; 
1733     server
.maxidletime 
= REDIS_MAXIDLETIME
; 
1734     server
.saveparams 
= NULL
; 
1735     server
.logfile 
= NULL
; /* NULL = log on standard output */ 
1736     server
.bindaddr 
= NULL
; 
1737     server
.glueoutputbuf 
= 1; 
1738     server
.daemonize 
= 0; 
1739     server
.appendonly 
= 0; 
1740     server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
1741     server
.no_appendfsync_on_rewrite 
= 0; 
1742     server
.lastfsync 
= time(NULL
); 
1743     server
.appendfd 
= -1; 
1744     server
.appendseldb 
= -1; /* Make sure the first time will not match */ 
1745     server
.pidfile 
= zstrdup("/var/run/redis.pid"); 
1746     server
.dbfilename 
= zstrdup("dump.rdb"); 
1747     server
.appendfilename 
= zstrdup("appendonly.aof"); 
1748     server
.requirepass 
= NULL
; 
1749     server
.rdbcompression 
= 1; 
1750     server
.activerehashing 
= 1; 
1751     server
.maxclients 
= 0; 
1752     server
.blpop_blocked_clients 
= 0; 
1753     server
.maxmemory 
= 0; 
1754     server
.vm_enabled 
= 0; 
1755     server
.vm_swap_file 
= zstrdup("/tmp/redis-%p.vm"); 
1756     server
.vm_page_size 
= 256;          /* 256 bytes per page */ 
1757     server
.vm_pages 
= 1024*1024*100;    /* 104 millions of pages */ 
1758     server
.vm_max_memory 
= 1024LL*1024*1024*1; /* 1 GB of RAM */ 
1759     server
.vm_max_threads 
= 4; 
1760     server
.vm_blocked_clients 
= 0; 
1761     server
.hash_max_zipmap_entries 
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
; 
1762     server
.hash_max_zipmap_value 
= REDIS_HASH_MAX_ZIPMAP_VALUE
; 
1763     server
.list_max_ziplist_entries 
= REDIS_LIST_MAX_ZIPLIST_ENTRIES
; 
1764     server
.list_max_ziplist_value 
= REDIS_LIST_MAX_ZIPLIST_VALUE
; 
1765     server
.shutdown_asap 
= 0; 
1767     resetServerSaveParams(); 
1769     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */ 
1770     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */ 
1771     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ 
1772     /* Replication related */ 
1774     server
.masterauth 
= NULL
; 
1775     server
.masterhost 
= NULL
; 
1776     server
.masterport 
= 6379; 
1777     server
.master 
= NULL
; 
1778     server
.replstate 
= REDIS_REPL_NONE
; 
1780     /* Double constants initialization */ 
1782     R_PosInf 
= 1.0/R_Zero
; 
1783     R_NegInf 
= -1.0/R_Zero
; 
1784     R_Nan 
= R_Zero
/R_Zero
; 
1787 static void initServer() { 
1790     signal(SIGHUP
, SIG_IGN
); 
1791     signal(SIGPIPE
, SIG_IGN
); 
1792     setupSigSegvAction(); 
1794     server
.devnull 
= fopen("/dev/null","w"); 
1795     if (server
.devnull 
== NULL
) { 
1796         redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
); 
1799     server
.clients 
= listCreate(); 
1800     server
.slaves 
= listCreate(); 
1801     server
.monitors 
= listCreate(); 
1802     server
.objfreelist 
= listCreate(); 
1803     createSharedObjects(); 
1804     server
.el 
= aeCreateEventLoop(); 
1805     server
.db 
= zmalloc(sizeof(redisDb
)*server
.dbnum
); 
1806     server
.fd 
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
); 
1807     if (server
.fd 
== -1) { 
1808         redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
); 
1811     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1812         server
.db
[j
].dict 
= dictCreate(&dbDictType
,NULL
); 
1813         server
.db
[j
].expires 
= dictCreate(&keyptrDictType
,NULL
); 
1814         server
.db
[j
].blocking_keys 
= dictCreate(&keylistDictType
,NULL
); 
1815         server
.db
[j
].watched_keys 
= dictCreate(&keylistDictType
,NULL
); 
1816         if (server
.vm_enabled
) 
1817             server
.db
[j
].io_keys 
= dictCreate(&keylistDictType
,NULL
); 
1818         server
.db
[j
].id 
= j
; 
1820     server
.pubsub_channels 
= dictCreate(&keylistDictType
,NULL
); 
1821     server
.pubsub_patterns 
= listCreate(); 
1822     listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
); 
1823     listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
); 
1824     server
.cronloops 
= 0; 
1825     server
.bgsavechildpid 
= -1; 
1826     server
.bgrewritechildpid 
= -1; 
1827     server
.bgrewritebuf 
= sdsempty(); 
1828     server
.aofbuf 
= sdsempty(); 
1829     server
.lastsave 
= time(NULL
); 
1831     server
.stat_numcommands 
= 0; 
1832     server
.stat_numconnections 
= 0; 
1833     server
.stat_expiredkeys 
= 0; 
1834     server
.stat_starttime 
= time(NULL
); 
1835     server
.unixtime 
= time(NULL
); 
1836     aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
); 
1837     if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
, 
1838         acceptHandler
, NULL
) == AE_ERR
) oom("creating file event"); 
1840     if (server
.appendonly
) { 
1841         server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
1842         if (server
.appendfd 
== -1) { 
1843             redisLog(REDIS_WARNING
, "Can't open the append-only file: %s", 
1849     if (server
.vm_enabled
) vmInit(); 
1852 /* Empty the whole database */ 
1853 static long long emptyDb() { 
1855     long long removed 
= 0; 
1857     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
1858         removed 
+= dictSize(server
.db
[j
].dict
); 
1859         dictEmpty(server
.db
[j
].dict
); 
1860         dictEmpty(server
.db
[j
].expires
); 
1865 static int yesnotoi(char *s
) { 
1866     if (!strcasecmp(s
,"yes")) return 1; 
1867     else if (!strcasecmp(s
,"no")) return 0; 
1871 /* I agree, this is a very rudimental way to load a configuration... 
1872    will improve later if the config gets more complex */ 
1873 static void loadServerConfig(char *filename
) { 
1875     char buf
[REDIS_CONFIGLINE_MAX
+1], *err 
= NULL
; 
1879     if (filename
[0] == '-' && filename
[1] == '\0') 
1882         if ((fp 
= fopen(filename
,"r")) == NULL
) { 
1883             redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
); 
1888     while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) { 
1894         line 
= sdstrim(line
," \t\r\n"); 
1896         /* Skip comments and blank lines*/ 
1897         if (line
[0] == '#' || line
[0] == '\0') { 
1902         /* Split into arguments */ 
1903         argv 
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
); 
1904         sdstolower(argv
[0]); 
1906         /* Execute config directives */ 
1907         if (!strcasecmp(argv
[0],"timeout") && argc 
== 2) { 
1908             server
.maxidletime 
= atoi(argv
[1]); 
1909             if (server
.maxidletime 
< 0) { 
1910                 err 
= "Invalid timeout value"; goto loaderr
; 
1912         } else if (!strcasecmp(argv
[0],"port") && argc 
== 2) { 
1913             server
.port 
= atoi(argv
[1]); 
1914             if (server
.port 
< 1 || server
.port 
> 65535) { 
1915                 err 
= "Invalid port"; goto loaderr
; 
1917         } else if (!strcasecmp(argv
[0],"bind") && argc 
== 2) { 
1918             server
.bindaddr 
= zstrdup(argv
[1]); 
1919         } else if (!strcasecmp(argv
[0],"save") && argc 
== 3) { 
1920             int seconds 
= atoi(argv
[1]); 
1921             int changes 
= atoi(argv
[2]); 
1922             if (seconds 
< 1 || changes 
< 0) { 
1923                 err 
= "Invalid save parameters"; goto loaderr
; 
1925             appendServerSaveParams(seconds
,changes
); 
1926         } else if (!strcasecmp(argv
[0],"dir") && argc 
== 2) { 
1927             if (chdir(argv
[1]) == -1) { 
1928                 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s", 
1929                     argv
[1], strerror(errno
)); 
1932         } else if (!strcasecmp(argv
[0],"loglevel") && argc 
== 2) { 
1933             if (!strcasecmp(argv
[1],"debug")) server
.verbosity 
= REDIS_DEBUG
; 
1934             else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity 
= REDIS_VERBOSE
; 
1935             else if (!strcasecmp(argv
[1],"notice")) server
.verbosity 
= REDIS_NOTICE
; 
1936             else if (!strcasecmp(argv
[1],"warning")) server
.verbosity 
= REDIS_WARNING
; 
1938                 err 
= "Invalid log level. Must be one of debug, notice, warning"; 
1941         } else if (!strcasecmp(argv
[0],"logfile") && argc 
== 2) { 
1944             server
.logfile 
= zstrdup(argv
[1]); 
1945             if (!strcasecmp(server
.logfile
,"stdout")) { 
1946                 zfree(server
.logfile
); 
1947                 server
.logfile 
= NULL
; 
1949             if (server
.logfile
) { 
1950                 /* Test if we are able to open the file. The server will not 
1951                  * be able to abort just for this problem later... */ 
1952                 logfp 
= fopen(server
.logfile
,"a"); 
1953                 if (logfp 
== NULL
) { 
1954                     err 
= sdscatprintf(sdsempty(), 
1955                         "Can't open the log file: %s", strerror(errno
)); 
1960         } else if (!strcasecmp(argv
[0],"databases") && argc 
== 2) { 
1961             server
.dbnum 
= atoi(argv
[1]); 
1962             if (server
.dbnum 
< 1) { 
1963                 err 
= "Invalid number of databases"; goto loaderr
; 
1965         } else if (!strcasecmp(argv
[0],"include") && argc 
== 2) { 
1966             loadServerConfig(argv
[1]); 
1967         } else if (!strcasecmp(argv
[0],"maxclients") && argc 
== 2) { 
1968             server
.maxclients 
= atoi(argv
[1]); 
1969         } else if (!strcasecmp(argv
[0],"maxmemory") && argc 
== 2) { 
1970             server
.maxmemory 
= memtoll(argv
[1],NULL
); 
1971         } else if (!strcasecmp(argv
[0],"slaveof") && argc 
== 3) { 
1972             server
.masterhost 
= sdsnew(argv
[1]); 
1973             server
.masterport 
= atoi(argv
[2]); 
1974             server
.replstate 
= REDIS_REPL_CONNECT
; 
1975         } else if (!strcasecmp(argv
[0],"masterauth") && argc 
== 2) { 
1976                 server
.masterauth 
= zstrdup(argv
[1]); 
1977         } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc 
== 2) { 
1978             if ((server
.glueoutputbuf 
= yesnotoi(argv
[1])) == -1) { 
1979                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1981         } else if (!strcasecmp(argv
[0],"rdbcompression") && argc 
== 2) { 
1982             if ((server
.rdbcompression 
= yesnotoi(argv
[1])) == -1) { 
1983                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1985         } else if (!strcasecmp(argv
[0],"activerehashing") && argc 
== 2) { 
1986             if ((server
.activerehashing 
= yesnotoi(argv
[1])) == -1) { 
1987                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1989         } else if (!strcasecmp(argv
[0],"daemonize") && argc 
== 2) { 
1990             if ((server
.daemonize 
= yesnotoi(argv
[1])) == -1) { 
1991                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1993         } else if (!strcasecmp(argv
[0],"appendonly") && argc 
== 2) { 
1994             if ((server
.appendonly 
= yesnotoi(argv
[1])) == -1) { 
1995                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
1997         } else if (!strcasecmp(argv
[0],"appendfilename") && argc 
== 2) { 
1998             zfree(server
.appendfilename
); 
1999             server
.appendfilename 
= zstrdup(argv
[1]); 
2000         } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite") 
2002             if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) { 
2003                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
2005         } else if (!strcasecmp(argv
[0],"appendfsync") && argc 
== 2) { 
2006             if (!strcasecmp(argv
[1],"no")) { 
2007                 server
.appendfsync 
= APPENDFSYNC_NO
; 
2008             } else if (!strcasecmp(argv
[1],"always")) { 
2009                 server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
2010             } else if (!strcasecmp(argv
[1],"everysec")) { 
2011                 server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
2013                 err 
= "argument must be 'no', 'always' or 'everysec'"; 
2016         } else if (!strcasecmp(argv
[0],"requirepass") && argc 
== 2) { 
2017             server
.requirepass 
= zstrdup(argv
[1]); 
2018         } else if (!strcasecmp(argv
[0],"pidfile") && argc 
== 2) { 
2019             zfree(server
.pidfile
); 
2020             server
.pidfile 
= zstrdup(argv
[1]); 
2021         } else if (!strcasecmp(argv
[0],"dbfilename") && argc 
== 2) { 
2022             zfree(server
.dbfilename
); 
2023             server
.dbfilename 
= zstrdup(argv
[1]); 
2024         } else if (!strcasecmp(argv
[0],"vm-enabled") && argc 
== 2) { 
2025             if ((server
.vm_enabled 
= yesnotoi(argv
[1])) == -1) { 
2026                 err 
= "argument must be 'yes' or 'no'"; goto loaderr
; 
2028         } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc 
== 2) { 
2029             zfree(server
.vm_swap_file
); 
2030             server
.vm_swap_file 
= zstrdup(argv
[1]); 
2031         } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc 
== 2) { 
2032             server
.vm_max_memory 
= memtoll(argv
[1],NULL
); 
2033         } else if (!strcasecmp(argv
[0],"vm-page-size") && argc 
== 2) { 
2034             server
.vm_page_size 
= memtoll(argv
[1], NULL
); 
2035         } else if (!strcasecmp(argv
[0],"vm-pages") && argc 
== 2) { 
2036             server
.vm_pages 
= memtoll(argv
[1], NULL
); 
2037         } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc 
== 2) { 
2038             server
.vm_max_threads 
= strtoll(argv
[1], NULL
, 10); 
2039         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc 
== 2){ 
2040             server
.hash_max_zipmap_entries 
= memtoll(argv
[1], NULL
); 
2041         } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc 
== 2){ 
2042             server
.hash_max_zipmap_value 
= memtoll(argv
[1], NULL
); 
2043         } else if (!strcasecmp(argv
[0],"list-max-ziplist-entries") && argc 
== 2){ 
2044             server
.list_max_ziplist_entries 
= memtoll(argv
[1], NULL
); 
2045         } else if (!strcasecmp(argv
[0],"list-max-ziplist-value") && argc 
== 2){ 
2046             server
.list_max_ziplist_value 
= memtoll(argv
[1], NULL
); 
2048             err 
= "Bad directive or wrong number of arguments"; goto loaderr
; 
2050         for (j 
= 0; j 
< argc
; j
++) 
2055     if (fp 
!= stdin
) fclose(fp
); 
2059     fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n"); 
2060     fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
); 
2061     fprintf(stderr
, ">>> '%s'\n", line
); 
2062     fprintf(stderr
, "%s\n", err
); 
2066 static void freeClientArgv(redisClient 
*c
) { 
2069     for (j 
= 0; j 
< c
->argc
; j
++) 
2070         decrRefCount(c
->argv
[j
]); 
2071     for (j 
= 0; j 
< c
->mbargc
; j
++) 
2072         decrRefCount(c
->mbargv
[j
]); 
2077 static void freeClient(redisClient 
*c
) { 
2080     /* Note that if the client we are freeing is blocked into a blocking 
2081      * call, we have to set querybuf to NULL *before* to call 
2082      * unblockClientWaitingData() to avoid processInputBuffer() will get 
2083      * called. Also it is important to remove the file events after 
2084      * this, because this call adds the READABLE event. */ 
2085     sdsfree(c
->querybuf
); 
2087     if (c
->flags 
& REDIS_BLOCKED
) 
2088         unblockClientWaitingData(c
); 
2090     /* UNWATCH all the keys */ 
2092     listRelease(c
->watched_keys
); 
2093     /* Unsubscribe from all the pubsub channels */ 
2094     pubsubUnsubscribeAllChannels(c
,0); 
2095     pubsubUnsubscribeAllPatterns(c
,0); 
2096     dictRelease(c
->pubsub_channels
); 
2097     listRelease(c
->pubsub_patterns
); 
2098     /* Obvious cleanup */ 
2099     aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
2100     aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2101     listRelease(c
->reply
); 
2104     /* Remove from the list of clients */ 
2105     ln 
= listSearchKey(server
.clients
,c
); 
2106     redisAssert(ln 
!= NULL
); 
2107     listDelNode(server
.clients
,ln
); 
2108     /* Remove from the list of clients that are now ready to be restarted 
2109      * after waiting for swapped keys */ 
2110     if (c
->flags 
& REDIS_IO_WAIT 
&& listLength(c
->io_keys
) == 0) { 
2111         ln 
= listSearchKey(server
.io_ready_clients
,c
); 
2113             listDelNode(server
.io_ready_clients
,ln
); 
2114             server
.vm_blocked_clients
--; 
2117     /* Remove from the list of clients waiting for swapped keys */ 
2118     while (server
.vm_enabled 
&& listLength(c
->io_keys
)) { 
2119         ln 
= listFirst(c
->io_keys
); 
2120         dontWaitForSwappedKey(c
,ln
->value
); 
2122     listRelease(c
->io_keys
); 
2123     /* Master/slave cleanup */ 
2124     if (c
->flags 
& REDIS_SLAVE
) { 
2125         if (c
->replstate 
== REDIS_REPL_SEND_BULK 
&& c
->repldbfd 
!= -1) 
2127         list 
*l 
= (c
->flags 
& REDIS_MONITOR
) ? server
.monitors 
: server
.slaves
; 
2128         ln 
= listSearchKey(l
,c
); 
2129         redisAssert(ln 
!= NULL
); 
2132     if (c
->flags 
& REDIS_MASTER
) { 
2133         server
.master 
= NULL
; 
2134         server
.replstate 
= REDIS_REPL_CONNECT
; 
2136     /* Release memory */ 
2139     freeClientMultiState(c
); 
2143 #define GLUEREPLY_UP_TO (1024) 
2144 static void glueReplyBuffersIfNeeded(redisClient 
*c
) { 
2146     char buf
[GLUEREPLY_UP_TO
]; 
2151     listRewind(c
->reply
,&li
); 
2152     while((ln 
= listNext(&li
))) { 
2156         objlen 
= sdslen(o
->ptr
); 
2157         if (copylen 
+ objlen 
<= GLUEREPLY_UP_TO
) { 
2158             memcpy(buf
+copylen
,o
->ptr
,objlen
); 
2160             listDelNode(c
->reply
,ln
); 
2162             if (copylen 
== 0) return; 
2166     /* Now the output buffer is empty, add the new single element */ 
2167     o 
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
)); 
2168     listAddNodeHead(c
->reply
,o
); 
2171 static void sendReplyToClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2172     redisClient 
*c 
= privdata
; 
2173     int nwritten 
= 0, totwritten 
= 0, objlen
; 
2176     REDIS_NOTUSED(mask
); 
2178     /* Use writev() if we have enough buffers to send */ 
2179     if (!server
.glueoutputbuf 
&& 
2180         listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD 
&& 
2181         !(c
->flags 
& REDIS_MASTER
)) 
2183         sendReplyToClientWritev(el
, fd
, privdata
, mask
); 
2187     while(listLength(c
->reply
)) { 
2188         if (server
.glueoutputbuf 
&& listLength(c
->reply
) > 1) 
2189             glueReplyBuffersIfNeeded(c
); 
2191         o 
= listNodeValue(listFirst(c
->reply
)); 
2192         objlen 
= sdslen(o
->ptr
); 
2195             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2199         if (c
->flags 
& REDIS_MASTER
) { 
2200             /* Don't reply to a master */ 
2201             nwritten 
= objlen 
- c
->sentlen
; 
2203             nwritten 
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen 
- c
->sentlen
); 
2204             if (nwritten 
<= 0) break; 
2206         c
->sentlen 
+= nwritten
; 
2207         totwritten 
+= nwritten
; 
2208         /* If we fully sent the object on head go to the next one */ 
2209         if (c
->sentlen 
== objlen
) { 
2210             listDelNode(c
->reply
,listFirst(c
->reply
)); 
2213         /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT 
2214          * bytes, in a single threaded server it's a good idea to serve 
2215          * other clients as well, even if a very large request comes from 
2216          * super fast link that is always able to accept data (in real world 
2217          * scenario think about 'KEYS *' against the loopback interfae) */ 
2218         if (totwritten 
> REDIS_MAX_WRITE_PER_EVENT
) break; 
2220     if (nwritten 
== -1) { 
2221         if (errno 
== EAGAIN
) { 
2224             redisLog(REDIS_VERBOSE
, 
2225                 "Error writing to client: %s", strerror(errno
)); 
2230     if (totwritten 
> 0) c
->lastinteraction 
= time(NULL
); 
2231     if (listLength(c
->reply
) == 0) { 
2233         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2237 static void sendReplyToClientWritev(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) 
2239     redisClient 
*c 
= privdata
; 
2240     int nwritten 
= 0, totwritten 
= 0, objlen
, willwrite
; 
2242     struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
]; 
2243     int offset
, ion 
= 0; 
2245     REDIS_NOTUSED(mask
); 
2248     while (listLength(c
->reply
)) { 
2249         offset 
= c
->sentlen
; 
2253         /* fill-in the iov[] array */ 
2254         for(node 
= listFirst(c
->reply
); node
; node 
= listNextNode(node
)) { 
2255             o 
= listNodeValue(node
); 
2256             objlen 
= sdslen(o
->ptr
); 
2258             if (totwritten 
+ objlen 
- offset 
> REDIS_MAX_WRITE_PER_EVENT
) 
2261             if(ion 
== REDIS_WRITEV_IOVEC_COUNT
) 
2262                 break; /* no more iovecs */ 
2264             iov
[ion
].iov_base 
= ((char*)o
->ptr
) + offset
; 
2265             iov
[ion
].iov_len 
= objlen 
- offset
; 
2266             willwrite 
+= objlen 
- offset
; 
2267             offset 
= 0; /* just for the first item */ 
2274         /* write all collected blocks at once */ 
2275         if((nwritten 
= writev(fd
, iov
, ion
)) < 0) { 
2276             if (errno 
!= EAGAIN
) { 
2277                 redisLog(REDIS_VERBOSE
, 
2278                          "Error writing to client: %s", strerror(errno
)); 
2285         totwritten 
+= nwritten
; 
2286         offset 
= c
->sentlen
; 
2288         /* remove written robjs from c->reply */ 
2289         while (nwritten 
&& listLength(c
->reply
)) { 
2290             o 
= listNodeValue(listFirst(c
->reply
)); 
2291             objlen 
= sdslen(o
->ptr
); 
2293             if(nwritten 
>= objlen 
- offset
) { 
2294                 listDelNode(c
->reply
, listFirst(c
->reply
)); 
2295                 nwritten 
-= objlen 
- offset
; 
2299                 c
->sentlen 
+= nwritten
; 
2307         c
->lastinteraction 
= time(NULL
); 
2309     if (listLength(c
->reply
) == 0) { 
2311         aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
); 
2315 static int qsortRedisCommands(const void *r1
, const void *r2
) { 
2317         ((struct redisCommand
*)r1
)->name
, 
2318         ((struct redisCommand
*)r2
)->name
); 
2321 static void sortCommandTable() { 
2322     /* Copy and sort the read-only version of the command table */ 
2323     commandTable 
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
)); 
2324     memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
)); 
2326         sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
), 
2327         sizeof(struct redisCommand
),qsortRedisCommands
); 
2330 static struct redisCommand 
*lookupCommand(char *name
) { 
2331     struct redisCommand tmp 
= {name
,NULL
,0,0,NULL
,0,0,0}; 
2335         sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
), 
2336         sizeof(struct redisCommand
), 
2337         qsortRedisCommands
); 
2340 /* resetClient prepare the client to process the next command */ 
2341 static void resetClient(redisClient 
*c
) { 
2347 /* Call() is the core of Redis execution of a command */ 
2348 static void call(redisClient 
*c
, struct redisCommand 
*cmd
) { 
2351     dirty 
= server
.dirty
; 
2353     dirty 
= server
.dirty
-dirty
; 
2355     if (server
.appendonly 
&& dirty
) 
2356         feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
); 
2357     if ((dirty 
|| cmd
->flags 
& REDIS_CMD_FORCE_REPLICATION
) && 
2358         listLength(server
.slaves
)) 
2359         replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
); 
2360     if (listLength(server
.monitors
)) 
2361         replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
); 
2362     server
.stat_numcommands
++; 
2365 /* If this function gets called we already read a whole 
2366  * command, argments are in the client argv/argc fields. 
2367  * processCommand() execute the command or prepare the 
2368  * server for a bulk read from the client. 
2370  * If 1 is returned the client is still alive and valid and 
2371  * and other operations can be performed by the caller. Otherwise 
2372  * if 0 is returned the client was destroied (i.e. after QUIT). */ 
2373 static int processCommand(redisClient 
*c
) { 
2374     struct redisCommand 
*cmd
; 
2376     /* Free some memory if needed (maxmemory setting) */ 
2377     if (server
.maxmemory
) freeMemoryIfNeeded(); 
2379     /* Handle the multi bulk command type. This is an alternative protocol 
2380      * supported by Redis in order to receive commands that are composed of 
2381      * multiple binary-safe "bulk" arguments. The latency of processing is 
2382      * a bit higher but this allows things like multi-sets, so if this 
2383      * protocol is used only for MSET and similar commands this is a big win. */ 
2384     if (c
->multibulk 
== 0 && c
->argc 
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') { 
2385         c
->multibulk 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2386         if (c
->multibulk 
<= 0) { 
2390             decrRefCount(c
->argv
[c
->argc
-1]); 
2394     } else if (c
->multibulk
) { 
2395         if (c
->bulklen 
== -1) { 
2396             if (((char*)c
->argv
[0]->ptr
)[0] != '$') { 
2397                 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n")); 
2401                 int bulklen 
= atoi(((char*)c
->argv
[0]->ptr
)+1); 
2402                 decrRefCount(c
->argv
[0]); 
2403                 if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2405                     addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2410                 c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2414             c
->mbargv 
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1)); 
2415             c
->mbargv
[c
->mbargc
] = c
->argv
[0]; 
2419             if (c
->multibulk 
== 0) { 
2423                 /* Here we need to swap the multi-bulk argc/argv with the 
2424                  * normal argc/argv of the client structure. */ 
2426                 c
->argv 
= c
->mbargv
; 
2427                 c
->mbargv 
= auxargv
; 
2430                 c
->argc 
= c
->mbargc
; 
2431                 c
->mbargc 
= auxargc
; 
2433                 /* We need to set bulklen to something different than -1 
2434                  * in order for the code below to process the command without 
2435                  * to try to read the last argument of a bulk command as 
2436                  * a special argument. */ 
2438                 /* continue below and process the command */ 
2445     /* -- end of multi bulk commands processing -- */ 
2447     /* The QUIT command is handled as a special case. Normal command 
2448      * procs are unable to close the client connection safely */ 
2449     if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) { 
2454     /* Now lookup the command and check ASAP about trivial error conditions 
2455      * such wrong arity, bad command name and so forth. */ 
2456     cmd 
= lookupCommand(c
->argv
[0]->ptr
); 
2459             sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n", 
2460                 (char*)c
->argv
[0]->ptr
)); 
2463     } else if ((cmd
->arity 
> 0 && cmd
->arity 
!= c
->argc
) || 
2464                (c
->argc 
< -cmd
->arity
)) { 
2466             sdscatprintf(sdsempty(), 
2467                 "-ERR wrong number of arguments for '%s' command\r\n", 
2471     } else if (cmd
->flags 
& REDIS_CMD_BULK 
&& c
->bulklen 
== -1) { 
2472         /* This is a bulk command, we have to read the last argument yet. */ 
2473         int bulklen 
= atoi(c
->argv
[c
->argc
-1]->ptr
); 
2475         decrRefCount(c
->argv
[c
->argc
-1]); 
2476         if (bulklen 
< 0 || bulklen 
> 1024*1024*1024) { 
2478             addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n")); 
2483         c
->bulklen 
= bulklen
+2; /* add two bytes for CR+LF */ 
2484         /* It is possible that the bulk read is already in the 
2485          * buffer. Check this condition and handle it accordingly. 
2486          * This is just a fast path, alternative to call processInputBuffer(). 
2487          * It's a good idea since the code is small and this condition 
2488          * happens most of the times. */ 
2489         if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) { 
2490             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2492             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2494             /* Otherwise return... there is to read the last argument 
2495              * from the socket. */ 
2499     /* Let's try to encode the bulk object to save space. */ 
2500     if (cmd
->flags 
& REDIS_CMD_BULK
) 
2501         c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]); 
2503     /* Check if the user is authenticated */ 
2504     if (server
.requirepass 
&& !c
->authenticated 
&& cmd
->proc 
!= authCommand
) { 
2505         addReplySds(c
,sdsnew("-ERR operation not permitted\r\n")); 
2510     /* Handle the maxmemory directive */ 
2511     if (server
.maxmemory 
&& (cmd
->flags 
& REDIS_CMD_DENYOOM
) && 
2512         zmalloc_used_memory() > server
.maxmemory
) 
2514         addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n")); 
2519     /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ 
2520     if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0) 
2522         cmd
->proc 
!= subscribeCommand 
&& cmd
->proc 
!= unsubscribeCommand 
&& 
2523         cmd
->proc 
!= psubscribeCommand 
&& cmd
->proc 
!= punsubscribeCommand
) { 
2524         addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n")); 
2529     /* Exec the command */ 
2530     if (c
->flags 
& REDIS_MULTI 
&& 
2531         cmd
->proc 
!= execCommand 
&& cmd
->proc 
!= discardCommand 
&& 
2532         cmd
->proc 
!= multiCommand 
&& cmd
->proc 
!= watchCommand
) 
2534         queueMultiCommand(c
,cmd
); 
2535         addReply(c
,shared
.queued
); 
2537         if (server
.vm_enabled 
&& server
.vm_max_threads 
> 0 && 
2538             blockClientOnSwappedKeys(c
,cmd
)) return 1; 
2542     /* Prepare the client for the next command */ 
2547 static void replicationFeedSlaves(list 
*slaves
, int dictid
, robj 
**argv
, int argc
) { 
2552     /* We need 1+(ARGS*3) objects since commands are using the new protocol 
2553      * and we one 1 object for the first "*<count>\r\n" multibulk count, then 
2554      * for every additional object we have "$<count>\r\n" + object + "\r\n". */ 
2555     robj 
*static_outv
[REDIS_STATIC_ARGS
*3+1]; 
2558     if (argc 
<= REDIS_STATIC_ARGS
) { 
2561         outv 
= zmalloc(sizeof(robj
*)*(argc
*3+1)); 
2564     lenobj 
= createObject(REDIS_STRING
, 
2565             sdscatprintf(sdsempty(), "*%d\r\n", argc
)); 
2566     lenobj
->refcount 
= 0; 
2567     outv
[outc
++] = lenobj
; 
2568     for (j 
= 0; j 
< argc
; j
++) { 
2569         lenobj 
= createObject(REDIS_STRING
, 
2570             sdscatprintf(sdsempty(),"$%lu\r\n", 
2571                 (unsigned long) stringObjectLen(argv
[j
]))); 
2572         lenobj
->refcount 
= 0; 
2573         outv
[outc
++] = lenobj
; 
2574         outv
[outc
++] = argv
[j
]; 
2575         outv
[outc
++] = shared
.crlf
; 
2578     /* Increment all the refcounts at start and decrement at end in order to 
2579      * be sure to free objects if there is no slave in a replication state 
2580      * able to be feed with commands */ 
2581     for (j 
= 0; j 
< outc
; j
++) incrRefCount(outv
[j
]); 
2582     listRewind(slaves
,&li
); 
2583     while((ln 
= listNext(&li
))) { 
2584         redisClient 
*slave 
= ln
->value
; 
2586         /* Don't feed slaves that are still waiting for BGSAVE to start */ 
2587         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) continue; 
2589         /* Feed all the other slaves, MONITORs and so on */ 
2590         if (slave
->slaveseldb 
!= dictid
) { 
2594             case 0: selectcmd 
= shared
.select0
; break; 
2595             case 1: selectcmd 
= shared
.select1
; break; 
2596             case 2: selectcmd 
= shared
.select2
; break; 
2597             case 3: selectcmd 
= shared
.select3
; break; 
2598             case 4: selectcmd 
= shared
.select4
; break; 
2599             case 5: selectcmd 
= shared
.select5
; break; 
2600             case 6: selectcmd 
= shared
.select6
; break; 
2601             case 7: selectcmd 
= shared
.select7
; break; 
2602             case 8: selectcmd 
= shared
.select8
; break; 
2603             case 9: selectcmd 
= shared
.select9
; break; 
2605                 selectcmd 
= createObject(REDIS_STRING
, 
2606                     sdscatprintf(sdsempty(),"select %d\r\n",dictid
)); 
2607                 selectcmd
->refcount 
= 0; 
2610             addReply(slave
,selectcmd
); 
2611             slave
->slaveseldb 
= dictid
; 
2613         for (j 
= 0; j 
< outc
; j
++) addReply(slave
,outv
[j
]); 
2615     for (j 
= 0; j 
< outc
; j
++) decrRefCount(outv
[j
]); 
2616     if (outv 
!= static_outv
) zfree(outv
); 
2619 static sds 
sdscatrepr(sds s
, char *p
, size_t len
) { 
2620     s 
= sdscatlen(s
,"\"",1); 
2625             s 
= sdscatprintf(s
,"\\%c",*p
); 
2627         case '\n': s 
= sdscatlen(s
,"\\n",1); break; 
2628         case '\r': s 
= sdscatlen(s
,"\\r",1); break; 
2629         case '\t': s 
= sdscatlen(s
,"\\t",1); break; 
2630         case '\a': s 
= sdscatlen(s
,"\\a",1); break; 
2631         case '\b': s 
= sdscatlen(s
,"\\b",1); break; 
2634                 s 
= sdscatprintf(s
,"%c",*p
); 
2636                 s 
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
); 
2641     return sdscatlen(s
,"\"",1); 
2644 static void replicationFeedMonitors(list 
*monitors
, int dictid
, robj 
**argv
, int argc
) { 
2648     sds cmdrepr 
= sdsnew("+"); 
2652     gettimeofday(&tv
,NULL
); 
2653     cmdrepr 
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
); 
2654     if (dictid 
!= 0) cmdrepr 
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
); 
2656     for (j 
= 0; j 
< argc
; j
++) { 
2657         if (argv
[j
]->encoding 
== REDIS_ENCODING_INT
) { 
2658             cmdrepr 
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
); 
2660             cmdrepr 
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
, 
2661                         sdslen(argv
[j
]->ptr
)); 
2664             cmdrepr 
= sdscatlen(cmdrepr
," ",1); 
2666     cmdrepr 
= sdscatlen(cmdrepr
,"\r\n",2); 
2667     cmdobj 
= createObject(REDIS_STRING
,cmdrepr
); 
2669     listRewind(monitors
,&li
); 
2670     while((ln 
= listNext(&li
))) { 
2671         redisClient 
*monitor 
= ln
->value
; 
2672         addReply(monitor
,cmdobj
); 
2674     decrRefCount(cmdobj
); 
2677 static void processInputBuffer(redisClient 
*c
) { 
2679     /* Before to process the input buffer, make sure the client is not 
2680      * waitig for a blocking operation such as BLPOP. Note that the first 
2681      * iteration the client is never blocked, otherwise the processInputBuffer 
2682      * would not be called at all, but after the execution of the first commands 
2683      * in the input buffer the client may be blocked, and the "goto again" 
2684      * will try to reiterate. The following line will make it return asap. */ 
2685     if (c
->flags 
& REDIS_BLOCKED 
|| c
->flags 
& REDIS_IO_WAIT
) return; 
2686     if (c
->bulklen 
== -1) { 
2687         /* Read the first line of the query */ 
2688         char *p 
= strchr(c
->querybuf
,'\n'); 
2695             query 
= c
->querybuf
; 
2696             c
->querybuf 
= sdsempty(); 
2697             querylen 
= 1+(p
-(query
)); 
2698             if (sdslen(query
) > querylen
) { 
2699                 /* leave data after the first line of the query in the buffer */ 
2700                 c
->querybuf 
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
); 
2702             *p 
= '\0'; /* remove "\n" */ 
2703             if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */ 
2704             sdsupdatelen(query
); 
2706             /* Now we can split the query in arguments */ 
2707             argv 
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
); 
2710             if (c
->argv
) zfree(c
->argv
); 
2711             c
->argv 
= zmalloc(sizeof(robj
*)*argc
); 
2713             for (j 
= 0; j 
< argc
; j
++) { 
2714                 if (sdslen(argv
[j
])) { 
2715                     c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]); 
2723                 /* Execute the command. If the client is still valid 
2724                  * after processCommand() return and there is something 
2725                  * on the query buffer try to process the next command. */ 
2726                 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2728                 /* Nothing to process, argc == 0. Just process the query 
2729                  * buffer if it's not empty or return to the caller */ 
2730                 if (sdslen(c
->querybuf
)) goto again
; 
2733         } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) { 
2734             redisLog(REDIS_VERBOSE
, "Client protocol error"); 
2739         /* Bulk read handling. Note that if we are at this point 
2740            the client already sent a command terminated with a newline, 
2741            we are reading the bulk data that is actually the last 
2742            argument of the command. */ 
2743         int qbl 
= sdslen(c
->querybuf
); 
2745         if (c
->bulklen 
<= qbl
) { 
2746             /* Copy everything but the final CRLF as final argument */ 
2747             c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2); 
2749             c
->querybuf 
= sdsrange(c
->querybuf
,c
->bulklen
,-1); 
2750             /* Process the command. If the client is still valid after 
2751              * the processing and there is more data in the buffer 
2752              * try to parse it. */ 
2753             if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
; 
2759 static void readQueryFromClient(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2760     redisClient 
*c 
= (redisClient
*) privdata
; 
2761     char buf
[REDIS_IOBUF_LEN
]; 
2764     REDIS_NOTUSED(mask
); 
2766     nread 
= read(fd
, buf
, REDIS_IOBUF_LEN
); 
2768         if (errno 
== EAGAIN
) { 
2771             redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
)); 
2775     } else if (nread 
== 0) { 
2776         redisLog(REDIS_VERBOSE
, "Client closed connection"); 
2781         c
->querybuf 
= sdscatlen(c
->querybuf
, buf
, nread
); 
2782         c
->lastinteraction 
= time(NULL
); 
2786     processInputBuffer(c
); 
2789 static int selectDb(redisClient 
*c
, int id
) { 
2790     if (id 
< 0 || id 
>= server
.dbnum
) 
2792     c
->db 
= &server
.db
[id
]; 
2796 static void *dupClientReplyValue(void *o
) { 
2797     incrRefCount((robj
*)o
); 
2801 static int listMatchObjects(void *a
, void *b
) { 
2802     return equalStringObjects(a
,b
); 
2805 static redisClient 
*createClient(int fd
) { 
2806     redisClient 
*c 
= zmalloc(sizeof(*c
)); 
2808     anetNonBlock(NULL
,fd
); 
2809     anetTcpNoDelay(NULL
,fd
); 
2810     if (!c
) return NULL
; 
2813     c
->querybuf 
= sdsempty(); 
2822     c
->lastinteraction 
= time(NULL
); 
2823     c
->authenticated 
= 0; 
2824     c
->replstate 
= REDIS_REPL_NONE
; 
2825     c
->reply 
= listCreate(); 
2826     listSetFreeMethod(c
->reply
,decrRefCount
); 
2827     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
2828     c
->blocking_keys 
= NULL
; 
2829     c
->blocking_keys_num 
= 0; 
2830     c
->io_keys 
= listCreate(); 
2831     c
->watched_keys 
= listCreate(); 
2832     listSetFreeMethod(c
->io_keys
,decrRefCount
); 
2833     c
->pubsub_channels 
= dictCreate(&setDictType
,NULL
); 
2834     c
->pubsub_patterns 
= listCreate(); 
2835     listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
); 
2836     listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
); 
2837     if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
, 
2838         readQueryFromClient
, c
) == AE_ERR
) { 
2842     listAddNodeTail(server
.clients
,c
); 
2843     initClientMultiState(c
); 
2847 static void addReply(redisClient 
*c
, robj 
*obj
) { 
2848     if (listLength(c
->reply
) == 0 && 
2849         (c
->replstate 
== REDIS_REPL_NONE 
|| 
2850          c
->replstate 
== REDIS_REPL_ONLINE
) && 
2851         aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
, 
2852         sendReplyToClient
, c
) == AE_ERR
) return; 
2854     if (server
.vm_enabled 
&& obj
->storage 
!= REDIS_VM_MEMORY
) { 
2855         obj 
= dupStringObject(obj
); 
2856         obj
->refcount 
= 0; /* getDecodedObject() will increment the refcount */ 
2858     listAddNodeTail(c
->reply
,getDecodedObject(obj
)); 
2861 static void addReplySds(redisClient 
*c
, sds s
) { 
2862     robj 
*o 
= createObject(REDIS_STRING
,s
); 
2867 static void addReplyDouble(redisClient 
*c
, double d
) { 
2870     snprintf(buf
,sizeof(buf
),"%.17g",d
); 
2871     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n", 
2872         (unsigned long) strlen(buf
),buf
)); 
2875 static void addReplyLongLong(redisClient 
*c
, long long ll
) { 
2880         addReply(c
,shared
.czero
); 
2882     } else if (ll 
== 1) { 
2883         addReply(c
,shared
.cone
); 
2887     len 
= ll2string(buf
+1,sizeof(buf
)-1,ll
); 
2890     addReplySds(c
,sdsnewlen(buf
,len
+3)); 
2893 static void addReplyUlong(redisClient 
*c
, unsigned long ul
) { 
2898         addReply(c
,shared
.czero
); 
2900     } else if (ul 
== 1) { 
2901         addReply(c
,shared
.cone
); 
2904     len 
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
); 
2905     addReplySds(c
,sdsnewlen(buf
,len
)); 
2908 static void addReplyBulkLen(redisClient 
*c
, robj 
*obj
) { 
2912     if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
2913         len 
= sdslen(obj
->ptr
); 
2915         long n 
= (long)obj
->ptr
; 
2917         /* Compute how many bytes will take this integer as a radix 10 string */ 
2923         while((n 
= n
/10) != 0) { 
2928     intlen 
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
); 
2929     buf
[intlen
+1] = '\r'; 
2930     buf
[intlen
+2] = '\n'; 
2931     addReplySds(c
,sdsnewlen(buf
,intlen
+3)); 
2934 static void addReplyBulk(redisClient 
*c
, robj 
*obj
) { 
2935     addReplyBulkLen(c
,obj
); 
2937     addReply(c
,shared
.crlf
); 
2940 static void addReplyBulkSds(redisClient 
*c
, sds s
) { 
2941     robj 
*o 
= createStringObject(s
, sdslen(s
)); 
2946 /* In the CONFIG command we need to add vanilla C string as bulk replies */ 
2947 static void addReplyBulkCString(redisClient 
*c
, char *s
) { 
2949         addReply(c
,shared
.nullbulk
); 
2951         robj 
*o 
= createStringObject(s
,strlen(s
)); 
2957 static void acceptHandler(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
2962     REDIS_NOTUSED(mask
); 
2963     REDIS_NOTUSED(privdata
); 
2965     cfd 
= anetAccept(server
.neterr
, fd
, cip
, &cport
); 
2966     if (cfd 
== AE_ERR
) { 
2967         redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
); 
2970     redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
); 
2971     if ((c 
= createClient(cfd
)) == NULL
) { 
2972         redisLog(REDIS_WARNING
,"Error allocating resoures for the client"); 
2973         close(cfd
); /* May be already closed, just ingore errors */ 
2976     /* If maxclient directive is set and this is one client more... close the 
2977      * connection. Note that we create the client instead to check before 
2978      * for this condition, since now the socket is already set in nonblocking 
2979      * mode and we can send an error for free using the Kernel I/O */ 
2980     if (server
.maxclients 
&& listLength(server
.clients
) > server
.maxclients
) { 
2981         char *err 
= "-ERR max number of clients reached\r\n"; 
2983         /* That's a best effort error message, don't check write errors */ 
2984         if (write(c
->fd
,err
,strlen(err
)) == -1) { 
2985             /* Nothing to do, Just to avoid the warning... */ 
2990     server
.stat_numconnections
++; 
2993 /* ======================= Redis objects implementation ===================== */ 
2995 static robj 
*createObject(int type
, void *ptr
) { 
2998     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
2999     if (listLength(server
.objfreelist
)) { 
3000         listNode 
*head 
= listFirst(server
.objfreelist
); 
3001         o 
= listNodeValue(head
); 
3002         listDelNode(server
.objfreelist
,head
); 
3003         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3005         if (server
.vm_enabled
) 
3006             pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3007         o 
= zmalloc(sizeof(*o
)); 
3010     o
->encoding 
= REDIS_ENCODING_RAW
; 
3013     if (server
.vm_enabled
) { 
3014         /* Note that this code may run in the context of an I/O thread 
3015          * and accessing server.lruclock in theory is an error 
3016          * (no locks). But in practice this is safe, and even if we read 
3017          * garbage Redis will not fail. */ 
3018         o
->lru 
= server
.lruclock
; 
3019         o
->storage 
= REDIS_VM_MEMORY
; 
3024 static robj 
*createStringObject(char *ptr
, size_t len
) { 
3025     return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
)); 
3028 static robj 
*createStringObjectFromLongLong(long long value
) { 
3030     if (value 
>= 0 && value 
< REDIS_SHARED_INTEGERS
) { 
3031         incrRefCount(shared
.integers
[value
]); 
3032         o 
= shared
.integers
[value
]; 
3034         if (value 
>= LONG_MIN 
&& value 
<= LONG_MAX
) { 
3035             o 
= createObject(REDIS_STRING
, NULL
); 
3036             o
->encoding 
= REDIS_ENCODING_INT
; 
3037             o
->ptr 
= (void*)((long)value
); 
3039             o 
= createObject(REDIS_STRING
,sdsfromlonglong(value
)); 
3045 static robj 
*dupStringObject(robj 
*o
) { 
3046     assert(o
->encoding 
== REDIS_ENCODING_RAW
); 
3047     return createStringObject(o
->ptr
,sdslen(o
->ptr
)); 
3050 static robj 
*createListObject(void) { 
3051     list 
*l 
= listCreate(); 
3052     robj 
*o 
= createObject(REDIS_LIST
,l
); 
3053     listSetFreeMethod(l
,decrRefCount
); 
3054     o
->encoding 
= REDIS_ENCODING_LIST
; 
3058 static robj 
*createZiplistObject(void) { 
3059     unsigned char *zl 
= ziplistNew(); 
3060     robj 
*o 
= createObject(REDIS_LIST
,zl
); 
3061     o
->encoding 
= REDIS_ENCODING_ZIPLIST
; 
3065 static robj 
*createSetObject(void) { 
3066     dict 
*d 
= dictCreate(&setDictType
,NULL
); 
3067     return createObject(REDIS_SET
,d
); 
3070 static robj 
*createHashObject(void) { 
3071     /* All the Hashes start as zipmaps. Will be automatically converted 
3072      * into hash tables if there are enough elements or big elements 
3074     unsigned char *zm 
= zipmapNew(); 
3075     robj 
*o 
= createObject(REDIS_HASH
,zm
); 
3076     o
->encoding 
= REDIS_ENCODING_ZIPMAP
; 
3080 static robj 
*createZsetObject(void) { 
3081     zset 
*zs 
= zmalloc(sizeof(*zs
)); 
3083     zs
->dict 
= dictCreate(&zsetDictType
,NULL
); 
3084     zs
->zsl 
= zslCreate(); 
3085     return createObject(REDIS_ZSET
,zs
); 
3088 static void freeStringObject(robj 
*o
) { 
3089     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3094 static void freeListObject(robj 
*o
) { 
3095     switch (o
->encoding
) { 
3096     case REDIS_ENCODING_LIST
: 
3097         listRelease((list
*) o
->ptr
); 
3099     case REDIS_ENCODING_ZIPLIST
: 
3103         redisPanic("Unknown list encoding type"); 
3107 static void freeSetObject(robj 
*o
) { 
3108     dictRelease((dict
*) o
->ptr
); 
3111 static void freeZsetObject(robj 
*o
) { 
3114     dictRelease(zs
->dict
); 
3119 static void freeHashObject(robj 
*o
) { 
3120     switch (o
->encoding
) { 
3121     case REDIS_ENCODING_HT
: 
3122         dictRelease((dict
*) o
->ptr
); 
3124     case REDIS_ENCODING_ZIPMAP
: 
3128         redisPanic("Unknown hash encoding type"); 
3133 static void incrRefCount(robj 
*o
) { 
3137 static void decrRefCount(void *obj
) { 
3140     /* Object is a swapped out value, or in the process of being loaded. */ 
3141     if (server
.vm_enabled 
&& 
3142         (o
->storage 
== REDIS_VM_SWAPPED 
|| o
->storage 
== REDIS_VM_LOADING
)) 
3144         vmpointer 
*vp 
= obj
; 
3145         if (o
->storage 
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(o
); 
3146         vmMarkPagesFree(vp
->page
,vp
->usedpages
); 
3147         server
.vm_stats_swapped_objects
--; 
3152     if (o
->refcount 
<= 0) redisPanic("decrRefCount against refcount <= 0"); 
3153     /* Object is in memory, or in the process of being swapped out. 
3155      * If the object is being swapped out, abort the operation on 
3156      * decrRefCount even if the refcount does not drop to 0: the object 
3157      * is referenced at least two times, as value of the key AND as 
3158      * job->val in the iojob. So if we don't invalidate the iojob, when it is 
3159      * done but the relevant key was removed in the meantime, the 
3160      * complete jobs handler will not find the key about the job and the 
3161      * assert will fail. */ 
3162     if (server
.vm_enabled 
&& o
->storage 
== REDIS_VM_SWAPPING
) 
3163         vmCancelThreadedIOJob(o
); 
3164     if (--(o
->refcount
) == 0) { 
3166         case REDIS_STRING
: freeStringObject(o
); break; 
3167         case REDIS_LIST
: freeListObject(o
); break; 
3168         case REDIS_SET
: freeSetObject(o
); break; 
3169         case REDIS_ZSET
: freeZsetObject(o
); break; 
3170         case REDIS_HASH
: freeHashObject(o
); break; 
3171         default: redisPanic("Unknown object type"); break; 
3173         if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
3174         if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX 
|| 
3175             !listAddNodeHead(server
.objfreelist
,o
)) 
3177         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
3181 static int checkType(redisClient 
*c
, robj 
*o
, int type
) { 
3182     if (o
->type 
!= type
) { 
3183         addReply(c
,shared
.wrongtypeerr
); 
3189 /* Check if the nul-terminated string 's' can be represented by a long 
3190  * (that is, is a number that fits into long without any other space or 
3191  * character before or after the digits). 
3193  * If so, the function returns REDIS_OK and *longval is set to the value 
3194  * of the number. Otherwise REDIS_ERR is returned */ 
3195 static int isStringRepresentableAsLong(sds s
, long *longval
) { 
3196     char buf
[32], *endptr
; 
3200     value 
= strtol(s
, &endptr
, 10); 
3201     if (endptr
[0] != '\0') return REDIS_ERR
; 
3202     slen 
= ll2string(buf
,32,value
); 
3204     /* If the number converted back into a string is not identical 
3205      * then it's not possible to encode the string as integer */ 
3206     if (sdslen(s
) != (unsigned)slen 
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
; 
3207     if (longval
) *longval 
= value
; 
3211 /* Try to encode a string object in order to save space */ 
3212 static robj 
*tryObjectEncoding(robj 
*o
) { 
3216     if (o
->encoding 
!= REDIS_ENCODING_RAW
) 
3217         return o
; /* Already encoded */ 
3219     /* It's not safe to encode shared objects: shared objects can be shared 
3220      * everywhere in the "object space" of Redis. Encoded objects can only 
3221      * appear as "values" (and not, for instance, as keys) */ 
3222      if (o
->refcount 
> 1) return o
; 
3224     /* Currently we try to encode only strings */ 
3225     redisAssert(o
->type 
== REDIS_STRING
); 
3227     /* Check if we can represent this string as a long integer */ 
3228     if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
; 
3230     /* Ok, this object can be encoded */ 
3231     if (value 
>= 0 && value 
< REDIS_SHARED_INTEGERS
) { 
3233         incrRefCount(shared
.integers
[value
]); 
3234         return shared
.integers
[value
]; 
3236         o
->encoding 
= REDIS_ENCODING_INT
; 
3238         o
->ptr 
= (void*) value
; 
3243 /* Get a decoded version of an encoded object (returned as a new object). 
3244  * If the object is already raw-encoded just increment the ref count. */ 
3245 static robj 
*getDecodedObject(robj 
*o
) { 
3248     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3252     if (o
->type 
== REDIS_STRING 
&& o
->encoding 
== REDIS_ENCODING_INT
) { 
3255         ll2string(buf
,32,(long)o
->ptr
); 
3256         dec 
= createStringObject(buf
,strlen(buf
)); 
3259         redisPanic("Unknown encoding type"); 
3263 /* Compare two string objects via strcmp() or alike. 
3264  * Note that the objects may be integer-encoded. In such a case we 
3265  * use ll2string() to get a string representation of the numbers on the stack 
3266  * and compare the strings, it's much faster than calling getDecodedObject(). 
3268  * Important note: if objects are not integer encoded, but binary-safe strings, 
3269  * sdscmp() from sds.c will apply memcmp() so this function ca be considered 
3271 static int compareStringObjects(robj 
*a
, robj 
*b
) { 
3272     redisAssert(a
->type 
== REDIS_STRING 
&& b
->type 
== REDIS_STRING
); 
3273     char bufa
[128], bufb
[128], *astr
, *bstr
; 
3276     if (a 
== b
) return 0; 
3277     if (a
->encoding 
!= REDIS_ENCODING_RAW
) { 
3278         ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
); 
3284     if (b
->encoding 
!= REDIS_ENCODING_RAW
) { 
3285         ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
); 
3291     return bothsds 
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
); 
3294 /* Equal string objects return 1 if the two objects are the same from the 
3295  * point of view of a string comparison, otherwise 0 is returned. Note that 
3296  * this function is faster then checking for (compareStringObject(a,b) == 0) 
3297  * because it can perform some more optimization. */ 
3298 static int equalStringObjects(robj 
*a
, robj 
*b
) { 
3299     if (a
->encoding 
!= REDIS_ENCODING_RAW 
&& b
->encoding 
!= REDIS_ENCODING_RAW
){ 
3300         return a
->ptr 
== b
->ptr
; 
3302         return compareStringObjects(a
,b
) == 0; 
3306 static size_t stringObjectLen(robj 
*o
) { 
3307     redisAssert(o
->type 
== REDIS_STRING
); 
3308     if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3309         return sdslen(o
->ptr
); 
3313         return ll2string(buf
,32,(long)o
->ptr
); 
3317 static int getDoubleFromObject(robj 
*o
, double *target
) { 
3324         redisAssert(o
->type 
== REDIS_STRING
); 
3325         if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3326             value 
= strtod(o
->ptr
, &eptr
); 
3327             if (eptr
[0] != '\0') return REDIS_ERR
; 
3328         } else if (o
->encoding 
== REDIS_ENCODING_INT
) { 
3329             value 
= (long)o
->ptr
; 
3331             redisPanic("Unknown string encoding"); 
3339 static int getDoubleFromObjectOrReply(redisClient 
*c
, robj 
*o
, double *target
, const char *msg
) { 
3341     if (getDoubleFromObject(o
, &value
) != REDIS_OK
) { 
3343             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3345             addReplySds(c
, sdsnew("-ERR value is not a double\r\n")); 
3354 static int getLongLongFromObject(robj 
*o
, long long *target
) { 
3361         redisAssert(o
->type 
== REDIS_STRING
); 
3362         if (o
->encoding 
== REDIS_ENCODING_RAW
) { 
3363             value 
= strtoll(o
->ptr
, &eptr
, 10); 
3364             if (eptr
[0] != '\0') return REDIS_ERR
; 
3365         } else if (o
->encoding 
== REDIS_ENCODING_INT
) { 
3366             value 
= (long)o
->ptr
; 
3368             redisPanic("Unknown string encoding"); 
3376 static int getLongLongFromObjectOrReply(redisClient 
*c
, robj 
*o
, long long *target
, const char *msg
) { 
3378     if (getLongLongFromObject(o
, &value
) != REDIS_OK
) { 
3380             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3382             addReplySds(c
, sdsnew("-ERR value is not an integer\r\n")); 
3391 static int getLongFromObjectOrReply(redisClient 
*c
, robj 
*o
, long *target
, const char *msg
) { 
3394     if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
; 
3395     if (value 
< LONG_MIN 
|| value 
> LONG_MAX
) { 
3397             addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
)); 
3399             addReplySds(c
, sdsnew("-ERR value is out of range\r\n")); 
3408 /* =========================== Keyspace access API ========================== */ 
3410 static robj 
*lookupKey(redisDb 
*db
, robj 
*key
) { 
3411     dictEntry 
*de 
= dictFind(db
->dict
,key
->ptr
); 
3413         robj 
*val 
= dictGetEntryVal(de
); 
3415         if (server
.vm_enabled
) { 
3416             if (val
->storage 
== REDIS_VM_MEMORY 
|| 
3417                 val
->storage 
== REDIS_VM_SWAPPING
) 
3419                 /* If we were swapping the object out, cancel the operation */ 
3420                 if (val
->storage 
== REDIS_VM_SWAPPING
) 
3421                     vmCancelThreadedIOJob(val
); 
3422                 /* Update the access time for the aging algorithm. */ 
3423                 val
->lru 
= server
.lruclock
; 
3425                 int notify 
= (val
->storage 
== REDIS_VM_LOADING
); 
3427                 /* Our value was swapped on disk. Bring it at home. */ 
3428                 redisAssert(val
->type 
== REDIS_VMPOINTER
); 
3429                 val 
= vmLoadObject(val
); 
3430                 dictGetEntryVal(de
) = val
; 
3432                 /* Clients blocked by the VM subsystem may be waiting for 
3434                 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
); 
3443 static robj 
*lookupKeyRead(redisDb 
*db
, robj 
*key
) { 
3444     expireIfNeeded(db
,key
); 
3445     return lookupKey(db
,key
); 
3448 static robj 
*lookupKeyWrite(redisDb 
*db
, robj 
*key
) { 
3449     deleteIfVolatile(db
,key
); 
3450     touchWatchedKey(db
,key
); 
3451     return lookupKey(db
,key
); 
3454 static robj 
*lookupKeyReadOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
3455     robj 
*o 
= lookupKeyRead(c
->db
, key
); 
3456     if (!o
) addReply(c
,reply
); 
3460 static robj 
*lookupKeyWriteOrReply(redisClient 
*c
, robj 
*key
, robj 
*reply
) { 
3461     robj 
*o 
= lookupKeyWrite(c
->db
, key
); 
3462     if (!o
) addReply(c
,reply
); 
3466 /* Add the key to the DB. If the key already exists REDIS_ERR is returned, 
3467  * otherwise REDIS_OK is returned, and the caller should increment the 
3468  * refcount of 'val'. */ 
3469 static int dbAdd(redisDb 
*db
, robj 
*key
, robj 
*val
) { 
3470     /* Perform a lookup before adding the key, as we need to copy the 
3472     if (dictFind(db
->dict
, key
->ptr
) != NULL
) { 
3475         sds copy 
= sdsdup(key
->ptr
); 
3476         dictAdd(db
->dict
, copy
, val
); 
3481 /* If the key does not exist, this is just like dbAdd(). Otherwise 
3482  * the value associated to the key is replaced with the new one. 
3484  * On update (key already existed) 0 is returned. Otherwise 1. */ 
3485 static int dbReplace(redisDb 
*db
, robj 
*key
, robj 
*val
) { 
3486     if (dictFind(db
->dict
,key
->ptr
) == NULL
) { 
3487         sds copy 
= sdsdup(key
->ptr
); 
3488         dictAdd(db
->dict
, copy
, val
); 
3491         dictReplace(db
->dict
, key
->ptr
, val
); 
3496 static int dbExists(redisDb 
*db
, robj 
*key
) { 
3497     return dictFind(db
->dict
,key
->ptr
) != NULL
; 
3500 /* Return a random key, in form of a Redis object. 
3501  * If there are no keys, NULL is returned. 
3503  * The function makes sure to return keys not already expired. */ 
3504 static robj 
*dbRandomKey(redisDb 
*db
) { 
3505     struct dictEntry 
*de
; 
3511         de 
= dictGetRandomKey(db
->dict
); 
3512         if (de 
== NULL
) return NULL
; 
3514         key 
= dictGetEntryKey(de
); 
3515         keyobj 
= createStringObject(key
,sdslen(key
)); 
3516         if (dictFind(db
->expires
,key
)) { 
3517             if (expireIfNeeded(db
,keyobj
)) { 
3518                 decrRefCount(keyobj
); 
3519                 continue; /* search for another key. This expired. */ 
3526 /* Delete a key, value, and associated expiration entry if any, from the DB */ 
3527 static int dbDelete(redisDb 
*db
, robj 
*key
) { 
3530     if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
->ptr
); 
3531     retval 
= dictDelete(db
->dict
,key
->ptr
); 
3533     return retval 
== DICT_OK
; 
3536 /*============================ RDB saving/loading =========================== */ 
3538 static int rdbSaveType(FILE *fp
, unsigned char type
) { 
3539     if (fwrite(&type
,1,1,fp
) == 0) return -1; 
3543 static int rdbSaveTime(FILE *fp
, time_t t
) { 
3544     int32_t t32 
= (int32_t) t
; 
3545     if (fwrite(&t32
,4,1,fp
) == 0) return -1; 
3549 /* check rdbLoadLen() comments for more info */ 
3550 static int rdbSaveLen(FILE *fp
, uint32_t len
) { 
3551     unsigned char buf
[2]; 
3554         /* Save a 6 bit len */ 
3555         buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6); 
3556         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3557     } else if (len 
< (1<<14)) { 
3558         /* Save a 14 bit len */ 
3559         buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6); 
3561         if (fwrite(buf
,2,1,fp
) == 0) return -1; 
3563         /* Save a 32 bit len */ 
3564         buf
[0] = (REDIS_RDB_32BITLEN
<<6); 
3565         if (fwrite(buf
,1,1,fp
) == 0) return -1; 
3567         if (fwrite(&len
,4,1,fp
) == 0) return -1; 
3572 /* Encode 'value' as an integer if possible (if integer will fit the 
3573  * supported range). If the function sucessful encoded the integer 
3574  * then the (up to 5 bytes) encoded representation is written in the 
3575  * string pointed by 'enc' and the length is returned. Otherwise 
3577 static int rdbEncodeInteger(long long value
, unsigned char *enc
) { 
3578     /* Finally check if it fits in our ranges */ 
3579     if (value 
>= -(1<<7) && value 
<= (1<<7)-1) { 
3580         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
; 
3581         enc
[1] = value
&0xFF; 
3583     } else if (value 
>= -(1<<15) && value 
<= (1<<15)-1) { 
3584         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
; 
3585         enc
[1] = value
&0xFF; 
3586         enc
[2] = (value
>>8)&0xFF; 
3588     } else if (value 
>= -((long long)1<<31) && value 
<= ((long long)1<<31)-1) { 
3589         enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
; 
3590         enc
[1] = value
&0xFF; 
3591         enc
[2] = (value
>>8)&0xFF; 
3592         enc
[3] = (value
>>16)&0xFF; 
3593         enc
[4] = (value
>>24)&0xFF; 
3600 /* String objects in the form "2391" "-100" without any space and with a 
3601  * range of values that can fit in an 8, 16 or 32 bit signed value can be 
3602  * encoded as integers to save space */ 
3603 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) { 
3605     char *endptr
, buf
[32]; 
3607     /* Check if it's possible to encode this value as a number */ 
3608     value 
= strtoll(s
, &endptr
, 10); 
3609     if (endptr
[0] != '\0') return 0; 
3610     ll2string(buf
,32,value
); 
3612     /* If the number converted back into a string is not identical 
3613      * then it's not possible to encode the string as integer */ 
3614     if (strlen(buf
) != len 
|| memcmp(buf
,s
,len
)) return 0; 
3616     return rdbEncodeInteger(value
,enc
); 
3619 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) { 
3620     size_t comprlen
, outlen
; 
3624     /* We require at least four bytes compression for this to be worth it */ 
3625     if (len 
<= 4) return 0; 
3627     if ((out 
= zmalloc(outlen
+1)) == NULL
) return 0; 
3628     comprlen 
= lzf_compress(s
, len
, out
, outlen
); 
3629     if (comprlen 
== 0) { 
3633     /* Data compressed! Let's save it on disk */ 
3634     byte 
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
; 
3635     if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
; 
3636     if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
; 
3637     if (rdbSaveLen(fp
,len
) == -1) goto writeerr
; 
3638     if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
; 
3647 /* Save a string objet as [len][data] on disk. If the object is a string 
3648  * representation of an integer value we try to safe it in a special form */ 
3649 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) { 
3652     /* Try integer encoding */ 
3654         unsigned char buf
[5]; 
3655         if ((enclen 
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) { 
3656             if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3661     /* Try LZF compression - under 20 bytes it's unable to compress even 
3662      * aaaaaaaaaaaaaaaaaa so skip it */ 
3663     if (server
.rdbcompression 
&& len 
> 20) { 
3666         retval 
= rdbSaveLzfStringObject(fp
,s
,len
); 
3667         if (retval 
== -1) return -1; 
3668         if (retval 
> 0) return 0; 
3669         /* retval == 0 means data can't be compressed, save the old way */ 
3672     /* Store verbatim */ 
3673     if (rdbSaveLen(fp
,len
) == -1) return -1; 
3674     if (len 
&& fwrite(s
,len
,1,fp
) == 0) return -1; 
3678 /* Save a long long value as either an encoded string or a string. */ 
3679 static int rdbSaveLongLongAsStringObject(FILE *fp
, long long value
) { 
3680     unsigned char buf
[32]; 
3681     int enclen 
= rdbEncodeInteger(value
,buf
); 
3683         if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3685         /* Encode as string */ 
3686         enclen 
= ll2string((char*)buf
,32,value
); 
3687         redisAssert(enclen 
< 32); 
3688         if (rdbSaveLen(fp
,enclen
) == -1) return -1; 
3689         if (fwrite(buf
,enclen
,1,fp
) == 0) return -1; 
3694 /* Like rdbSaveStringObjectRaw() but handle encoded objects */ 
3695 static int rdbSaveStringObject(FILE *fp
, robj 
*obj
) { 
3696     /* Avoid to decode the object, then encode it again, if the 
3697      * object is alrady integer encoded. */ 
3698     if (obj
->encoding 
== REDIS_ENCODING_INT
) { 
3699         return rdbSaveLongLongAsStringObject(fp
,(long)obj
->ptr
); 
3701         redisAssert(obj
->encoding 
== REDIS_ENCODING_RAW
); 
3702         return rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
3706 /* Save a double value. Doubles are saved as strings prefixed by an unsigned 
3707  * 8 bit integer specifing the length of the representation. 
3708  * This 8 bit integer has special values in order to specify the following 
3714 static int rdbSaveDoubleValue(FILE *fp
, double val
) { 
3715     unsigned char buf
[128]; 
3721     } else if (!isfinite(val
)) { 
3723         buf
[0] = (val 
< 0) ? 255 : 254; 
3725 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL) 
3726         /* Check if the float is in a safe range to be casted into a 
3727          * long long. We are assuming that long long is 64 bit here. 
3728          * Also we are assuming that there are no implementations around where 
3729          * double has precision < 52 bit. 
3731          * Under this assumptions we test if a double is inside an interval 
3732          * where casting to long long is safe. Then using two castings we 
3733          * make sure the decimal part is zero. If all this is true we use 
3734          * integer printing function that is much faster. */ 
3735         double min 
= -4503599627370495; /* (2^52)-1 */ 
3736         double max 
= 4503599627370496; /* -(2^52) */ 
3737         if (val 
> min 
&& val 
< max 
&& val 
== ((double)((long long)val
))) 
3738             ll2string((char*)buf
+1,sizeof(buf
),(long long)val
); 
3741             snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
); 
3742         buf
[0] = strlen((char*)buf
+1); 
3745     if (fwrite(buf
,len
,1,fp
) == 0) return -1; 
3749 /* Save a Redis object. */ 
3750 static int rdbSaveObject(FILE *fp
, robj 
*o
) { 
3751     if (o
->type 
== REDIS_STRING
) { 
3752         /* Save a string value */ 
3753         if (rdbSaveStringObject(fp
,o
) == -1) return -1; 
3754     } else if (o
->type 
== REDIS_LIST
) { 
3755         /* Save a list value */ 
3756         if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
3758             unsigned char *vstr
; 
3762             if (rdbSaveLen(fp
,ziplistLen(o
->ptr
)) == -1) return -1; 
3763             p 
= ziplistIndex(o
->ptr
,0); 
3764             while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
3766                     if (rdbSaveRawString(fp
,vstr
,vlen
) == -1) 
3769                     if (rdbSaveLongLongAsStringObject(fp
,vlong
) == -1) 
3772                 p 
= ziplistNext(o
->ptr
,p
); 
3774         } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
3775             list 
*list 
= o
->ptr
; 
3779             if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1; 
3780             listRewind(list
,&li
); 
3781             while((ln 
= listNext(&li
))) { 
3782                 robj 
*eleobj 
= listNodeValue(ln
); 
3783                 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3786             redisPanic("Unknown list encoding"); 
3788     } else if (o
->type 
== REDIS_SET
) { 
3789         /* Save a set value */ 
3791         dictIterator 
*di 
= dictGetIterator(set
); 
3794         if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1; 
3795         while((de 
= dictNext(di
)) != NULL
) { 
3796             robj 
*eleobj 
= dictGetEntryKey(de
); 
3798             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3800         dictReleaseIterator(di
); 
3801     } else if (o
->type 
== REDIS_ZSET
) { 
3802         /* Save a set value */ 
3804         dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
3807         if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1; 
3808         while((de 
= dictNext(di
)) != NULL
) { 
3809             robj 
*eleobj 
= dictGetEntryKey(de
); 
3810             double *score 
= dictGetEntryVal(de
); 
3812             if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1; 
3813             if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1; 
3815         dictReleaseIterator(di
); 
3816     } else if (o
->type 
== REDIS_HASH
) { 
3817         /* Save a hash value */ 
3818         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
3819             unsigned char *p 
= zipmapRewind(o
->ptr
); 
3820             unsigned int count 
= zipmapLen(o
->ptr
); 
3821             unsigned char *key
, *val
; 
3822             unsigned int klen
, vlen
; 
3824             if (rdbSaveLen(fp
,count
) == -1) return -1; 
3825             while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
3826                 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1; 
3827                 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1; 
3830             dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
3833             if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1; 
3834             while((de 
= dictNext(di
)) != NULL
) { 
3835                 robj 
*key 
= dictGetEntryKey(de
); 
3836                 robj 
*val 
= dictGetEntryVal(de
); 
3838                 if (rdbSaveStringObject(fp
,key
) == -1) return -1; 
3839                 if (rdbSaveStringObject(fp
,val
) == -1) return -1; 
3841             dictReleaseIterator(di
); 
3844         redisPanic("Unknown object type"); 
3849 /* Return the length the object will have on disk if saved with 
3850  * the rdbSaveObject() function. Currently we use a trick to get 
3851  * this length with very little changes to the code. In the future 
3852  * we could switch to a faster solution. */ 
3853 static off_t 
rdbSavedObjectLen(robj 
*o
, FILE *fp
) { 
3854     if (fp 
== NULL
) fp 
= server
.devnull
; 
3856     assert(rdbSaveObject(fp
,o
) != 1); 
3860 /* Return the number of pages required to save this object in the swap file */ 
3861 static off_t 
rdbSavedObjectPages(robj 
*o
, FILE *fp
) { 
3862     off_t bytes 
= rdbSavedObjectLen(o
,fp
); 
3864     return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
; 
3867 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */ 
3868 static int rdbSave(char *filename
) { 
3869     dictIterator 
*di 
= NULL
; 
3874     time_t now 
= time(NULL
); 
3876     /* Wait for I/O therads to terminate, just in case this is a 
3877      * foreground-saving, to avoid seeking the swap file descriptor at the 
3879     if (server
.vm_enabled
) 
3880         waitEmptyIOJobsQueue(); 
3882     snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid()); 
3883     fp 
= fopen(tmpfile
,"w"); 
3885         redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
)); 
3888     if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
; 
3889     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
3890         redisDb 
*db 
= server
.db
+j
; 
3892         if (dictSize(d
) == 0) continue; 
3893         di 
= dictGetIterator(d
); 
3899         /* Write the SELECT DB opcode */ 
3900         if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
; 
3901         if (rdbSaveLen(fp
,j
) == -1) goto werr
; 
3903         /* Iterate this DB writing every entry */ 
3904         while((de 
= dictNext(di
)) != NULL
) { 
3905             sds keystr 
= dictGetEntryKey(de
); 
3906             robj key
, *o 
= dictGetEntryVal(de
); 
3909             initStaticStringObject(key
,keystr
); 
3910             expiretime 
= getExpire(db
,&key
); 
3912             /* Save the expire time */ 
3913             if (expiretime 
!= -1) { 
3914                 /* If this key is already expired skip it */ 
3915                 if (expiretime 
< now
) continue; 
3916                 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
; 
3917                 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
; 
3919             /* Save the key and associated value. This requires special 
3920              * handling if the value is swapped out. */ 
3921             if (!server
.vm_enabled 
|| o
->storage 
== REDIS_VM_MEMORY 
|| 
3922                                       o
->storage 
== REDIS_VM_SWAPPING
) { 
3923                 /* Save type, key, value */ 
3924                 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
; 
3925                 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
; 
3926                 if (rdbSaveObject(fp
,o
) == -1) goto werr
; 
3928                 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */ 
3930                 /* Get a preview of the object in memory */ 
3931                 po 
= vmPreviewObject(o
); 
3932                 /* Save type, key, value */ 
3933                 if (rdbSaveType(fp
,po
->type
) == -1) goto werr
; 
3934                 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
; 
3935                 if (rdbSaveObject(fp
,po
) == -1) goto werr
; 
3936                 /* Remove the loaded object from memory */ 
3940         dictReleaseIterator(di
); 
3943     if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
; 
3945     /* Make sure data will not remain on the OS's output buffers */ 
3950     /* Use RENAME to make sure the DB file is changed atomically only 
3951      * if the generate DB file is ok. */ 
3952     if (rename(tmpfile
,filename
) == -1) { 
3953         redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
)); 
3957     redisLog(REDIS_NOTICE
,"DB saved on disk"); 
3959     server
.lastsave 
= time(NULL
); 
3965     redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
)); 
3966     if (di
) dictReleaseIterator(di
); 
3970 static int rdbSaveBackground(char *filename
) { 
3973     if (server
.bgsavechildpid 
!= -1) return REDIS_ERR
; 
3974     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
3975     if ((childpid 
= fork()) == 0) { 
3977         if (server
.vm_enabled
) vmReopenSwapFile(); 
3979         if (rdbSave(filename
) == REDIS_OK
) { 
3986         if (childpid 
== -1) { 
3987             redisLog(REDIS_WARNING
,"Can't save in background: fork: %s", 
3991         redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
); 
3992         server
.bgsavechildpid 
= childpid
; 
3993         updateDictResizePolicy(); 
3996     return REDIS_OK
; /* unreached */ 
3999 static void rdbRemoveTempFile(pid_t childpid
) { 
4002     snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
); 
4006 static int rdbLoadType(FILE *fp
) { 
4008     if (fread(&type
,1,1,fp
) == 0) return -1; 
4012 static time_t rdbLoadTime(FILE *fp
) { 
4014     if (fread(&t32
,4,1,fp
) == 0) return -1; 
4015     return (time_t) t32
; 
4018 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top 
4019  * of this file for a description of how this are stored on disk. 
4021  * isencoded is set to 1 if the readed length is not actually a length but 
4022  * an "encoding type", check the above comments for more info */ 
4023 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) { 
4024     unsigned char buf
[2]; 
4028     if (isencoded
) *isencoded 
= 0; 
4029     if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
4030     type 
= (buf
[0]&0xC0)>>6; 
4031     if (type 
== REDIS_RDB_6BITLEN
) { 
4032         /* Read a 6 bit len */ 
4034     } else if (type 
== REDIS_RDB_ENCVAL
) { 
4035         /* Read a 6 bit len encoding type */ 
4036         if (isencoded
) *isencoded 
= 1; 
4038     } else if (type 
== REDIS_RDB_14BITLEN
) { 
4039         /* Read a 14 bit len */ 
4040         if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
; 
4041         return ((buf
[0]&0x3F)<<8)|buf
[1]; 
4043         /* Read a 32 bit len */ 
4044         if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
; 
4049 /* Load an integer-encoded object from file 'fp', with the specified 
4050  * encoding type 'enctype'. If encode is true the function may return 
4051  * an integer-encoded object as reply, otherwise the returned object 
4052  * will always be encoded as a raw string. */ 
4053 static robj 
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) { 
4054     unsigned char enc
[4]; 
4057     if (enctype 
== REDIS_RDB_ENC_INT8
) { 
4058         if (fread(enc
,1,1,fp
) == 0) return NULL
; 
4059         val 
= (signed char)enc
[0]; 
4060     } else if (enctype 
== REDIS_RDB_ENC_INT16
) { 
4062         if (fread(enc
,2,1,fp
) == 0) return NULL
; 
4063         v 
= enc
[0]|(enc
[1]<<8); 
4065     } else if (enctype 
== REDIS_RDB_ENC_INT32
) { 
4067         if (fread(enc
,4,1,fp
) == 0) return NULL
; 
4068         v 
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24); 
4071         val 
= 0; /* anti-warning */ 
4072         redisPanic("Unknown RDB integer encoding type"); 
4075         return createStringObjectFromLongLong(val
); 
4077         return createObject(REDIS_STRING
,sdsfromlonglong(val
)); 
4080 static robj 
*rdbLoadLzfStringObject(FILE*fp
) { 
4081     unsigned int len
, clen
; 
4082     unsigned char *c 
= NULL
; 
4085     if ((clen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4086     if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4087     if ((c 
= zmalloc(clen
)) == NULL
) goto err
; 
4088     if ((val 
= sdsnewlen(NULL
,len
)) == NULL
) goto err
; 
4089     if (fread(c
,clen
,1,fp
) == 0) goto err
; 
4090     if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
; 
4092     return createObject(REDIS_STRING
,val
); 
4099 static robj 
*rdbGenericLoadStringObject(FILE*fp
, int encode
) { 
4104     len 
= rdbLoadLen(fp
,&isencoded
); 
4107         case REDIS_RDB_ENC_INT8
: 
4108         case REDIS_RDB_ENC_INT16
: 
4109         case REDIS_RDB_ENC_INT32
: 
4110             return rdbLoadIntegerObject(fp
,len
,encode
); 
4111         case REDIS_RDB_ENC_LZF
: 
4112             return rdbLoadLzfStringObject(fp
); 
4114             redisPanic("Unknown RDB encoding type"); 
4118     if (len 
== REDIS_RDB_LENERR
) return NULL
; 
4119     val 
= sdsnewlen(NULL
,len
); 
4120     if (len 
&& fread(val
,len
,1,fp
) == 0) { 
4124     return createObject(REDIS_STRING
,val
); 
4127 static robj 
*rdbLoadStringObject(FILE *fp
) { 
4128     return rdbGenericLoadStringObject(fp
,0); 
4131 static robj 
*rdbLoadEncodedStringObject(FILE *fp
) { 
4132     return rdbGenericLoadStringObject(fp
,1); 
4135 /* For information about double serialization check rdbSaveDoubleValue() */ 
4136 static int rdbLoadDoubleValue(FILE *fp
, double *val
) { 
4140     if (fread(&len
,1,1,fp
) == 0) return -1; 
4142     case 255: *val 
= R_NegInf
; return 0; 
4143     case 254: *val 
= R_PosInf
; return 0; 
4144     case 253: *val 
= R_Nan
; return 0; 
4146         if (fread(buf
,len
,1,fp
) == 0) return -1; 
4148         sscanf(buf
, "%lg", val
); 
4153 /* Load a Redis object of the specified type from the specified file. 
4154  * On success a newly allocated object is returned, otherwise NULL. */ 
4155 static robj 
*rdbLoadObject(int type
, FILE *fp
) { 
4156     robj 
*o
, *ele
, *dec
; 
4159     redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
)); 
4160     if (type 
== REDIS_STRING
) { 
4161         /* Read string value */ 
4162         if ((o 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4163         o 
= tryObjectEncoding(o
); 
4164     } else if (type 
== REDIS_LIST
) { 
4165         /* Read list value */ 
4166         if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4168         /* Use a real list when there are too many entries */ 
4169         if (len 
> server
.list_max_ziplist_entries
) { 
4170             o 
= createListObject(); 
4172             o 
= createZiplistObject(); 
4175         /* Load every single element of the list */ 
4177             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4179             /* If we are using a ziplist and the value is too big, convert 
4180              * the object to a real list. */ 
4181             if (o
->encoding 
== REDIS_ENCODING_ZIPLIST 
&& 
4182                 ele
->encoding 
== REDIS_ENCODING_RAW 
&& 
4183                 sdslen(ele
->ptr
) > server
.list_max_ziplist_value
) 
4184                     listTypeConvert(o
,REDIS_ENCODING_LIST
); 
4186             if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4187                 dec 
= getDecodedObject(ele
); 
4188                 o
->ptr 
= ziplistPush(o
->ptr
,dec
->ptr
,sdslen(dec
->ptr
),REDIS_TAIL
); 
4192                 ele 
= tryObjectEncoding(ele
); 
4193                 listAddNodeTail(o
->ptr
,ele
); 
4196     } else if (type 
== REDIS_SET
) { 
4197         /* Read list/set value */ 
4198         if ((len 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4199         o 
= createSetObject(); 
4200         /* It's faster to expand the dict to the right size asap in order 
4201          * to avoid rehashing */ 
4202         if (len 
> DICT_HT_INITIAL_SIZE
) 
4203             dictExpand(o
->ptr
,len
); 
4204         /* Load every single element of the list/set */ 
4206             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4207             ele 
= tryObjectEncoding(ele
); 
4208             dictAdd((dict
*)o
->ptr
,ele
,NULL
); 
4210     } else if (type 
== REDIS_ZSET
) { 
4211         /* Read list/set value */ 
4215         if ((zsetlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4216         o 
= createZsetObject(); 
4218         /* Load every single element of the list/set */ 
4221             double *score 
= zmalloc(sizeof(double)); 
4223             if ((ele 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4224             ele 
= tryObjectEncoding(ele
); 
4225             if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
; 
4226             dictAdd(zs
->dict
,ele
,score
); 
4227             zslInsert(zs
->zsl
,*score
,ele
); 
4228             incrRefCount(ele
); /* added to skiplist */ 
4230     } else if (type 
== REDIS_HASH
) { 
4233         if ((hashlen 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
; 
4234         o 
= createHashObject(); 
4235         /* Too many entries? Use an hash table. */ 
4236         if (hashlen 
> server
.hash_max_zipmap_entries
) 
4237             convertToRealHash(o
); 
4238         /* Load every key/value, then set it into the zipmap or hash 
4239          * table, as needed. */ 
4243             if ((key 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4244             if ((val 
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
; 
4245             /* If we are using a zipmap and there are too big values 
4246              * the object is converted to real hash table encoding. */ 
4247             if (o
->encoding 
!= REDIS_ENCODING_HT 
&& 
4248                (sdslen(key
->ptr
) > server
.hash_max_zipmap_value 
|| 
4249                 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
)) 
4251                     convertToRealHash(o
); 
4254             if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
4255                 unsigned char *zm 
= o
->ptr
; 
4257                 zm 
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
), 
4258                                   val
->ptr
,sdslen(val
->ptr
),NULL
); 
4263                 key 
= tryObjectEncoding(key
); 
4264                 val 
= tryObjectEncoding(val
); 
4265                 dictAdd((dict
*)o
->ptr
,key
,val
); 
4269         redisPanic("Unknown object type"); 
4274 static int rdbLoad(char *filename
) { 
4277     int type
, retval
, rdbver
; 
4278     int swap_all_values 
= 0; 
4279     redisDb 
*db 
= server
.db
+0; 
4281     time_t expiretime
, now 
= time(NULL
); 
4283     fp 
= fopen(filename
,"r"); 
4284     if (!fp
) return REDIS_ERR
; 
4285     if (fread(buf
,9,1,fp
) == 0) goto eoferr
; 
4287     if (memcmp(buf
,"REDIS",5) != 0) { 
4289         redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file"); 
4292     rdbver 
= atoi(buf
+5); 
4295         redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
); 
4304         if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
4305         if (type 
== REDIS_EXPIRETIME
) { 
4306             if ((expiretime 
= rdbLoadTime(fp
)) == -1) goto eoferr
; 
4307             /* We read the time so we need to read the object type again */ 
4308             if ((type 
= rdbLoadType(fp
)) == -1) goto eoferr
; 
4310         if (type 
== REDIS_EOF
) break; 
4311         /* Handle SELECT DB opcode as a special case */ 
4312         if (type 
== REDIS_SELECTDB
) { 
4313             if ((dbid 
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) 
4315             if (dbid 
>= (unsigned)server
.dbnum
) { 
4316                 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
); 
4319             db 
= server
.db
+dbid
; 
4323         if ((key 
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
; 
4325         if ((val 
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
; 
4326         /* Check if the key already expired */ 
4327         if (expiretime 
!= -1 && expiretime 
< now
) { 
4332         /* Add the new object in the hash table */ 
4333         retval 
= dbAdd(db
,key
,val
); 
4334         if (retval 
== REDIS_ERR
) { 
4335             redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
); 
4338         /* Set the expire time if needed */ 
4339         if (expiretime 
!= -1) setExpire(db
,key
,expiretime
); 
4341         /* Handle swapping while loading big datasets when VM is on */ 
4343         /* If we detecter we are hopeless about fitting something in memory 
4344          * we just swap every new key on disk. Directly... 
4345          * Note that's important to check for this condition before resorting 
4346          * to random sampling, otherwise we may try to swap already 
4348         if (swap_all_values
) { 
4349             dictEntry 
*de 
= dictFind(db
->dict
,key
->ptr
); 
4351             /* de may be NULL since the key already expired */ 
4354                 val 
= dictGetEntryVal(de
); 
4356                 if (val
->refcount 
== 1 && 
4357                     (vp 
= vmSwapObjectBlocking(val
)) != NULL
) 
4358                     dictGetEntryVal(de
) = vp
; 
4365         /* Flush data on disk once 32 MB of additional RAM are used... */ 
4367         if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32) 
4370         /* If we have still some hope of having some value fitting memory 
4371          * then we try random sampling. */ 
4372         if (!swap_all_values 
&& server
.vm_enabled 
&& force_swapout
) { 
4373             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
4374                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
4376             if (zmalloc_used_memory() > server
.vm_max_memory
) 
4377                 swap_all_values 
= 1; /* We are already using too much mem */ 
4383 eoferr
: /* unexpected end of file is handled here with a fatal exit */ 
4384     redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now."); 
4386     return REDIS_ERR
; /* Just to avoid warning */ 
4389 /*================================== Shutdown =============================== */ 
4390 static int prepareForShutdown() { 
4391     redisLog(REDIS_WARNING
,"User requested shutdown, saving DB..."); 
4392     /* Kill the saving child if there is a background saving in progress. 
4393        We want to avoid race conditions, for instance our saving child may 
4394        overwrite the synchronous saving did by SHUTDOWN. */ 
4395     if (server
.bgsavechildpid 
!= -1) { 
4396         redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!"); 
4397         kill(server
.bgsavechildpid
,SIGKILL
); 
4398         rdbRemoveTempFile(server
.bgsavechildpid
); 
4400     if (server
.appendonly
) { 
4401         /* Append only file: fsync() the AOF and exit */ 
4402         aof_fsync(server
.appendfd
); 
4403         if (server
.vm_enabled
) unlink(server
.vm_swap_file
); 
4405         /* Snapshotting. Perform a SYNC SAVE and exit */ 
4406         if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4407             if (server
.daemonize
) 
4408                 unlink(server
.pidfile
); 
4409             redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory()); 
4411             /* Ooops.. error saving! The best we can do is to continue 
4412              * operating. Note that if there was a background saving process, 
4413              * in the next cron() Redis will be notified that the background 
4414              * saving aborted, handling special stuff like slaves pending for 
4415              * synchronization... */ 
4416             redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit"); 
4420     redisLog(REDIS_WARNING
,"Server exit now, bye bye..."); 
4424 /*================================== Commands =============================== */ 
4426 static void authCommand(redisClient 
*c
) { 
4427     if (!server
.requirepass 
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) { 
4428       c
->authenticated 
= 1; 
4429       addReply(c
,shared
.ok
); 
4431       c
->authenticated 
= 0; 
4432       addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n")); 
4436 static void pingCommand(redisClient 
*c
) { 
4437     addReply(c
,shared
.pong
); 
4440 static void echoCommand(redisClient 
*c
) { 
4441     addReplyBulk(c
,c
->argv
[1]); 
4444 /*=================================== Strings =============================== */ 
4446 static void setGenericCommand(redisClient 
*c
, int nx
, robj 
*key
, robj 
*val
, robj 
*expire
) { 
4448     long seconds 
= 0; /* initialized to avoid an harmness warning */ 
4451         if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
) 
4454             addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n")); 
4459     touchWatchedKey(c
->db
,key
); 
4460     if (nx
) deleteIfVolatile(c
->db
,key
); 
4461     retval 
= dbAdd(c
->db
,key
,val
); 
4462     if (retval 
== REDIS_ERR
) { 
4464             dbReplace(c
->db
,key
,val
); 
4467             addReply(c
,shared
.czero
); 
4474     removeExpire(c
->db
,key
); 
4475     if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
); 
4476     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
4479 static void setCommand(redisClient 
*c
) { 
4480     setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
); 
4483 static void setnxCommand(redisClient 
*c
) { 
4484     setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
); 
4487 static void setexCommand(redisClient 
*c
) { 
4488     setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]); 
4491 static int getGenericCommand(redisClient 
*c
) { 
4494     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
) 
4497     if (o
->type 
!= REDIS_STRING
) { 
4498         addReply(c
,shared
.wrongtypeerr
); 
4506 static void getCommand(redisClient 
*c
) { 
4507     getGenericCommand(c
); 
4510 static void getsetCommand(redisClient 
*c
) { 
4511     if (getGenericCommand(c
) == REDIS_ERR
) return; 
4512     dbReplace(c
->db
,c
->argv
[1],c
->argv
[2]); 
4513     incrRefCount(c
->argv
[2]); 
4515     removeExpire(c
->db
,c
->argv
[1]); 
4518 static void mgetCommand(redisClient 
*c
) { 
4521     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1)); 
4522     for (j 
= 1; j 
< c
->argc
; j
++) { 
4523         robj 
*o 
= lookupKeyRead(c
->db
,c
->argv
[j
]); 
4525             addReply(c
,shared
.nullbulk
); 
4527             if (o
->type 
!= REDIS_STRING
) { 
4528                 addReply(c
,shared
.nullbulk
); 
4536 static void msetGenericCommand(redisClient 
*c
, int nx
) { 
4537     int j
, busykeys 
= 0; 
4539     if ((c
->argc 
% 2) == 0) { 
4540         addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n")); 
4543     /* Handle the NX flag. The MSETNX semantic is to return zero and don't 
4544      * set nothing at all if at least one already key exists. */ 
4546         for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
4547             if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) { 
4553         addReply(c
, shared
.czero
); 
4557     for (j 
= 1; j 
< c
->argc
; j 
+= 2) { 
4558         c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]); 
4559         dbReplace(c
->db
,c
->argv
[j
],c
->argv
[j
+1]); 
4560         incrRefCount(c
->argv
[j
+1]); 
4561         removeExpire(c
->db
,c
->argv
[j
]); 
4563     server
.dirty 
+= (c
->argc
-1)/2; 
4564     addReply(c
, nx 
? shared
.cone 
: shared
.ok
); 
4567 static void msetCommand(redisClient 
*c
) { 
4568     msetGenericCommand(c
,0); 
4571 static void msetnxCommand(redisClient 
*c
) { 
4572     msetGenericCommand(c
,1); 
4575 static void incrDecrCommand(redisClient 
*c
, long long incr
) { 
4579     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4580     if (o 
!= NULL 
&& checkType(c
,o
,REDIS_STRING
)) return; 
4581     if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return; 
4584     o 
= createStringObjectFromLongLong(value
); 
4585     dbReplace(c
->db
,c
->argv
[1],o
); 
4587     addReply(c
,shared
.colon
); 
4589     addReply(c
,shared
.crlf
); 
4592 static void incrCommand(redisClient 
*c
) { 
4593     incrDecrCommand(c
,1); 
4596 static void decrCommand(redisClient 
*c
) { 
4597     incrDecrCommand(c
,-1); 
4600 static void incrbyCommand(redisClient 
*c
) { 
4603     if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return; 
4604     incrDecrCommand(c
,incr
); 
4607 static void decrbyCommand(redisClient 
*c
) { 
4610     if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return; 
4611     incrDecrCommand(c
,-incr
); 
4614 static void appendCommand(redisClient 
*c
) { 
4619     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4621         /* Create the key */ 
4622         retval 
= dbAdd(c
->db
,c
->argv
[1],c
->argv
[2]); 
4623         incrRefCount(c
->argv
[2]); 
4624         totlen 
= stringObjectLen(c
->argv
[2]); 
4626         if (o
->type 
!= REDIS_STRING
) { 
4627             addReply(c
,shared
.wrongtypeerr
); 
4630         /* If the object is specially encoded or shared we have to make 
4632         if (o
->refcount 
!= 1 || o
->encoding 
!= REDIS_ENCODING_RAW
) { 
4633             robj 
*decoded 
= getDecodedObject(o
); 
4635             o 
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
)); 
4636             decrRefCount(decoded
); 
4637             dbReplace(c
->db
,c
->argv
[1],o
); 
4640         if (c
->argv
[2]->encoding 
== REDIS_ENCODING_RAW
) { 
4641             o
->ptr 
= sdscatlen(o
->ptr
, 
4642                 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
)); 
4644             o
->ptr 
= sdscatprintf(o
->ptr
, "%ld", 
4645                 (unsigned long) c
->argv
[2]->ptr
); 
4647         totlen 
= sdslen(o
->ptr
); 
4650     addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
)); 
4653 static void substrCommand(redisClient 
*c
) { 
4655     long start 
= atoi(c
->argv
[2]->ptr
); 
4656     long end 
= atoi(c
->argv
[3]->ptr
); 
4657     size_t rangelen
, strlen
; 
4660     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
4661         checkType(c
,o
,REDIS_STRING
)) return; 
4663     o 
= getDecodedObject(o
); 
4664     strlen 
= sdslen(o
->ptr
); 
4666     /* convert negative indexes */ 
4667     if (start 
< 0) start 
= strlen
+start
; 
4668     if (end 
< 0) end 
= strlen
+end
; 
4669     if (start 
< 0) start 
= 0; 
4670     if (end 
< 0) end 
= 0; 
4672     /* indexes sanity checks */ 
4673     if (start 
> end 
|| (size_t)start 
>= strlen
) { 
4674         /* Out of range start or start > end result in null reply */ 
4675         addReply(c
,shared
.nullbulk
); 
4679     if ((size_t)end 
>= strlen
) end 
= strlen
-1; 
4680     rangelen 
= (end
-start
)+1; 
4682     /* Return the result */ 
4683     addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
)); 
4684     range 
= sdsnewlen((char*)o
->ptr
+start
,rangelen
); 
4685     addReplySds(c
,range
); 
4686     addReply(c
,shared
.crlf
); 
4690 /* ========================= Type agnostic commands ========================= */ 
4692 static void delCommand(redisClient 
*c
) { 
4695     for (j 
= 1; j 
< c
->argc
; j
++) { 
4696         if (dbDelete(c
->db
,c
->argv
[j
])) { 
4697             touchWatchedKey(c
->db
,c
->argv
[j
]); 
4702     addReplyLongLong(c
,deleted
); 
4705 static void existsCommand(redisClient 
*c
) { 
4706     expireIfNeeded(c
->db
,c
->argv
[1]); 
4707     if (dbExists(c
->db
,c
->argv
[1])) { 
4708         addReply(c
, shared
.cone
); 
4710         addReply(c
, shared
.czero
); 
4714 static void selectCommand(redisClient 
*c
) { 
4715     int id 
= atoi(c
->argv
[1]->ptr
); 
4717     if (selectDb(c
,id
) == REDIS_ERR
) { 
4718         addReplySds(c
,sdsnew("-ERR invalid DB index\r\n")); 
4720         addReply(c
,shared
.ok
); 
4724 static void randomkeyCommand(redisClient 
*c
) { 
4727     if ((key 
= dbRandomKey(c
->db
)) == NULL
) { 
4728         addReply(c
,shared
.nullbulk
); 
4732     addReplyBulk(c
,key
); 
4736 static void keysCommand(redisClient 
*c
) { 
4739     sds pattern 
= c
->argv
[1]->ptr
; 
4740     int plen 
= sdslen(pattern
); 
4741     unsigned long numkeys 
= 0; 
4742     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
4744     di 
= dictGetIterator(c
->db
->dict
); 
4746     decrRefCount(lenobj
); 
4747     while((de 
= dictNext(di
)) != NULL
) { 
4748         sds key 
= dictGetEntryKey(de
); 
4751         if ((pattern
[0] == '*' && pattern
[1] == '\0') || 
4752             stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) { 
4753             keyobj 
= createStringObject(key
,sdslen(key
)); 
4754             if (expireIfNeeded(c
->db
,keyobj
) == 0) { 
4755                 addReplyBulk(c
,keyobj
); 
4758             decrRefCount(keyobj
); 
4761     dictReleaseIterator(di
); 
4762     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
); 
4765 static void dbsizeCommand(redisClient 
*c
) { 
4767         sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
))); 
4770 static void lastsaveCommand(redisClient 
*c
) { 
4772         sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
)); 
4775 static void typeCommand(redisClient 
*c
) { 
4779     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
4784         case REDIS_STRING
: type 
= "+string"; break; 
4785         case REDIS_LIST
: type 
= "+list"; break; 
4786         case REDIS_SET
: type 
= "+set"; break; 
4787         case REDIS_ZSET
: type 
= "+zset"; break; 
4788         case REDIS_HASH
: type 
= "+hash"; break; 
4789         default: type 
= "+unknown"; break; 
4792     addReplySds(c
,sdsnew(type
)); 
4793     addReply(c
,shared
.crlf
); 
4796 static void saveCommand(redisClient 
*c
) { 
4797     if (server
.bgsavechildpid 
!= -1) { 
4798         addReplySds(c
,sdsnew("-ERR background save in progress\r\n")); 
4801     if (rdbSave(server
.dbfilename
) == REDIS_OK
) { 
4802         addReply(c
,shared
.ok
); 
4804         addReply(c
,shared
.err
); 
4808 static void bgsaveCommand(redisClient 
*c
) { 
4809     if (server
.bgsavechildpid 
!= -1) { 
4810         addReplySds(c
,sdsnew("-ERR background save already in progress\r\n")); 
4813     if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) { 
4814         char *status 
= "+Background saving started\r\n"; 
4815         addReplySds(c
,sdsnew(status
)); 
4817         addReply(c
,shared
.err
); 
4821 static void shutdownCommand(redisClient 
*c
) { 
4822     if (prepareForShutdown() == REDIS_OK
) 
4824     addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n")); 
4827 static void renameGenericCommand(redisClient 
*c
, int nx
) { 
4830     /* To use the same key as src and dst is probably an error */ 
4831     if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) { 
4832         addReply(c
,shared
.sameobjecterr
); 
4836     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
) 
4840     deleteIfVolatile(c
->db
,c
->argv
[2]); 
4841     if (dbAdd(c
->db
,c
->argv
[2],o
) == REDIS_ERR
) { 
4844             addReply(c
,shared
.czero
); 
4847         dbReplace(c
->db
,c
->argv
[2],o
); 
4849     dbDelete(c
->db
,c
->argv
[1]); 
4850     touchWatchedKey(c
->db
,c
->argv
[2]); 
4852     addReply(c
,nx 
? shared
.cone 
: shared
.ok
); 
4855 static void renameCommand(redisClient 
*c
) { 
4856     renameGenericCommand(c
,0); 
4859 static void renamenxCommand(redisClient 
*c
) { 
4860     renameGenericCommand(c
,1); 
4863 static void moveCommand(redisClient 
*c
) { 
4868     /* Obtain source and target DB pointers */ 
4871     if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) { 
4872         addReply(c
,shared
.outofrangeerr
); 
4876     selectDb(c
,srcid
); /* Back to the source DB */ 
4878     /* If the user is moving using as target the same 
4879      * DB as the source DB it is probably an error. */ 
4881         addReply(c
,shared
.sameobjecterr
); 
4885     /* Check if the element exists and get a reference */ 
4886     o 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
4888         addReply(c
,shared
.czero
); 
4892     /* Try to add the element to the target DB */ 
4893     deleteIfVolatile(dst
,c
->argv
[1]); 
4894     if (dbAdd(dst
,c
->argv
[1],o
) == REDIS_ERR
) { 
4895         addReply(c
,shared
.czero
); 
4900     /* OK! key moved, free the entry in the source DB */ 
4901     dbDelete(src
,c
->argv
[1]); 
4903     addReply(c
,shared
.cone
); 
4906 /* =================================== Lists ================================ */ 
4909 /* Check the argument length to see if it requires us to convert the ziplist 
4910  * to a real list. Only check raw-encoded objects because integer encoded 
4911  * objects are never too long. */ 
4912 static void listTypeTryConversion(robj 
*subject
, robj 
*value
) { 
4913     if (subject
->encoding 
!= REDIS_ENCODING_ZIPLIST
) return; 
4914     if (value
->encoding 
== REDIS_ENCODING_RAW 
&& 
4915         sdslen(value
->ptr
) > server
.list_max_ziplist_value
) 
4916             listTypeConvert(subject
,REDIS_ENCODING_LIST
); 
4919 static void listTypePush(robj 
*subject
, robj 
*value
, int where
) { 
4920     /* Check if we need to convert the ziplist */ 
4921     listTypeTryConversion(subject
,value
); 
4922     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST 
&& 
4923         ziplistLen(subject
->ptr
) > server
.list_max_ziplist_entries
) 
4924             listTypeConvert(subject
,REDIS_ENCODING_LIST
); 
4926     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4927         int pos 
= (where 
== REDIS_HEAD
) ? ZIPLIST_HEAD 
: ZIPLIST_TAIL
; 
4928         value 
= getDecodedObject(value
); 
4929         subject
->ptr 
= ziplistPush(subject
->ptr
,value
->ptr
,sdslen(value
->ptr
),pos
); 
4930         decrRefCount(value
); 
4931     } else if (subject
->encoding 
== REDIS_ENCODING_LIST
) { 
4932         if (where 
== REDIS_HEAD
) { 
4933             listAddNodeHead(subject
->ptr
,value
); 
4935             listAddNodeTail(subject
->ptr
,value
); 
4937         incrRefCount(value
); 
4939         redisPanic("Unknown list encoding"); 
4943 static robj 
*listTypePop(robj 
*subject
, int where
) { 
4945     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4947         unsigned char *vstr
; 
4950         int pos 
= (where 
== REDIS_HEAD
) ? 0 : -1; 
4951         p 
= ziplistIndex(subject
->ptr
,pos
); 
4952         if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
4954                 value 
= createStringObject((char*)vstr
,vlen
); 
4956                 value 
= createStringObjectFromLongLong(vlong
); 
4958             /* We only need to delete an element when it exists */ 
4959             subject
->ptr 
= ziplistDelete(subject
->ptr
,&p
); 
4961     } else if (subject
->encoding 
== REDIS_ENCODING_LIST
) { 
4962         list 
*list 
= subject
->ptr
; 
4964         if (where 
== REDIS_HEAD
) { 
4965             ln 
= listFirst(list
); 
4967             ln 
= listLast(list
); 
4970             value 
= listNodeValue(ln
); 
4971             incrRefCount(value
); 
4972             listDelNode(list
,ln
); 
4975         redisPanic("Unknown list encoding"); 
4980 static unsigned long listTypeLength(robj 
*subject
) { 
4981     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
4982         return ziplistLen(subject
->ptr
); 
4983     } else if (subject
->encoding 
== REDIS_ENCODING_LIST
) { 
4984         return listLength((list
*)subject
->ptr
); 
4986         redisPanic("Unknown list encoding"); 
4990 /* Structure to hold set iteration abstraction. */ 
4993     unsigned char encoding
; 
4994     unsigned char direction
; /* Iteration direction */ 
4999 /* Structure for an entry while iterating over a list. */ 
5001     listTypeIterator 
*li
; 
5002     unsigned char *zi
;  /* Entry in ziplist */ 
5003     listNode 
*ln
;       /* Entry in linked list */ 
5006 /* Initialize an iterator at the specified index. */ 
5007 static listTypeIterator 
*listTypeInitIterator(robj 
*subject
, int index
, unsigned char direction
) { 
5008     listTypeIterator 
*li 
= zmalloc(sizeof(listTypeIterator
)); 
5009     li
->subject 
= subject
; 
5010     li
->encoding 
= subject
->encoding
; 
5011     li
->direction 
= direction
; 
5012     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5013         li
->zi 
= ziplistIndex(subject
->ptr
,index
); 
5014     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5015         li
->ln 
= listIndex(subject
->ptr
,index
); 
5017         redisPanic("Unknown list encoding"); 
5022 /* Clean up the iterator. */ 
5023 static void listTypeReleaseIterator(listTypeIterator 
*li
) { 
5027 /* Stores pointer to current the entry in the provided entry structure 
5028  * and advances the position of the iterator. Returns 1 when the current 
5029  * entry is in fact an entry, 0 otherwise. */ 
5030 static int listTypeNext(listTypeIterator 
*li
, listTypeEntry 
*entry
) { 
5031     /* Protect from converting when iterating */ 
5032     redisAssert(li
->subject
->encoding 
== li
->encoding
); 
5035     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5037         if (entry
->zi 
!= NULL
) { 
5038             if (li
->direction 
== REDIS_TAIL
) 
5039                 li
->zi 
= ziplistNext(li
->subject
->ptr
,li
->zi
); 
5041                 li
->zi 
= ziplistPrev(li
->subject
->ptr
,li
->zi
); 
5044     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5046         if (entry
->ln 
!= NULL
) { 
5047             if (li
->direction 
== REDIS_TAIL
) 
5048                 li
->ln 
= li
->ln
->next
; 
5050                 li
->ln 
= li
->ln
->prev
; 
5054         redisPanic("Unknown list encoding"); 
5059 /* Return entry or NULL at the current position of the iterator. */ 
5060 static robj 
*listTypeGet(listTypeEntry 
*entry
) { 
5061     listTypeIterator 
*li 
= entry
->li
; 
5063     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5064         unsigned char *vstr
; 
5067         redisAssert(entry
->zi 
!= NULL
); 
5068         if (ziplistGet(entry
->zi
,&vstr
,&vlen
,&vlong
)) { 
5070                 value 
= createStringObject((char*)vstr
,vlen
); 
5072                 value 
= createStringObjectFromLongLong(vlong
); 
5075     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5076         redisAssert(entry
->ln 
!= NULL
); 
5077         value 
= listNodeValue(entry
->ln
); 
5078         incrRefCount(value
); 
5080         redisPanic("Unknown list encoding"); 
5085 /* Compare the given object with the entry at the current position. */ 
5086 static int listTypeEqual(listTypeEntry 
*entry
, robj 
*o
) { 
5087     listTypeIterator 
*li 
= entry
->li
; 
5088     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5089         redisAssert(o
->encoding 
== REDIS_ENCODING_RAW
); 
5090         return ziplistCompare(entry
->zi
,o
->ptr
,sdslen(o
->ptr
)); 
5091     } else if (li
->encoding 
== REDIS_ENCODING_LIST
) { 
5092         return equalStringObjects(o
,listNodeValue(entry
->ln
)); 
5094         redisPanic("Unknown list encoding"); 
5098 /* Delete the element pointed to. */ 
5099 static void listTypeDelete(listTypeEntry 
*entry
) { 
5100     listTypeIterator 
*li 
= entry
->li
; 
5101     if (li
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5102         unsigned char *p 
= entry
->zi
; 
5103         li
->subject
->ptr 
= ziplistDelete(li
->subject
->ptr
,&p
); 
5105         /* Update position of the iterator depending on the direction */ 
5106         if (li
->direction 
== REDIS_TAIL
) 
5109             li
->zi 
= ziplistPrev(li
->subject
->ptr
,p
); 
5110     } else if (entry
->li
->encoding 
== REDIS_ENCODING_LIST
) { 
5112         if (li
->direction 
== REDIS_TAIL
) 
5113             next 
= entry
->ln
->next
; 
5115             next 
= entry
->ln
->prev
; 
5116         listDelNode(li
->subject
->ptr
,entry
->ln
); 
5119         redisPanic("Unknown list encoding"); 
5123 static void listTypeConvert(robj 
*subject
, int enc
) { 
5124     listTypeIterator 
*li
; 
5125     listTypeEntry entry
; 
5126     redisAssert(subject
->type 
== REDIS_LIST
); 
5128     if (enc 
== REDIS_ENCODING_LIST
) { 
5129         list 
*l 
= listCreate(); 
5130         listSetFreeMethod(l
,decrRefCount
); 
5132         /* listTypeGet returns a robj with incremented refcount */ 
5133         li 
= listTypeInitIterator(subject
,0,REDIS_TAIL
); 
5134         while (listTypeNext(li
,&entry
)) listAddNodeTail(l
,listTypeGet(&entry
)); 
5135         listTypeReleaseIterator(li
); 
5137         subject
->encoding 
= REDIS_ENCODING_LIST
; 
5138         zfree(subject
->ptr
); 
5141         redisPanic("Unsupported list conversion"); 
5145 static void pushGenericCommand(redisClient 
*c
, int where
) { 
5146     robj 
*lobj 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5148         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
5149             addReply(c
,shared
.cone
); 
5152         lobj 
= createZiplistObject(); 
5153         dbAdd(c
->db
,c
->argv
[1],lobj
); 
5155         if (lobj
->type 
!= REDIS_LIST
) { 
5156             addReply(c
,shared
.wrongtypeerr
); 
5159         if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) { 
5160             addReply(c
,shared
.cone
); 
5164     listTypePush(lobj
,c
->argv
[2],where
); 
5165     addReplyLongLong(c
,listTypeLength(lobj
)); 
5169 static void lpushCommand(redisClient 
*c
) { 
5170     pushGenericCommand(c
,REDIS_HEAD
); 
5173 static void rpushCommand(redisClient 
*c
) { 
5174     pushGenericCommand(c
,REDIS_TAIL
); 
5177 static void llenCommand(redisClient 
*c
) { 
5178     robj 
*o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
); 
5179     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5180     addReplyUlong(c
,listTypeLength(o
)); 
5183 static void lindexCommand(redisClient 
*c
) { 
5184     robj 
*o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
); 
5185     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5186     int index 
= atoi(c
->argv
[2]->ptr
); 
5189     if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5191         unsigned char *vstr
; 
5194         p 
= ziplistIndex(o
->ptr
,index
); 
5195         if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
5197                 value 
= createStringObject((char*)vstr
,vlen
); 
5199                 value 
= createStringObjectFromLongLong(vlong
); 
5201             addReplyBulk(c
,value
); 
5202             decrRefCount(value
); 
5204             addReply(c
,shared
.nullbulk
); 
5206     } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
5207         listNode 
*ln 
= listIndex(o
->ptr
,index
); 
5209             value 
= listNodeValue(ln
); 
5210             addReplyBulk(c
,value
); 
5212             addReply(c
,shared
.nullbulk
); 
5215         redisPanic("Unknown list encoding"); 
5219 static void lsetCommand(redisClient 
*c
) { 
5220     robj 
*o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
); 
5221     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5222     int index 
= atoi(c
->argv
[2]->ptr
); 
5223     robj 
*value 
= c
->argv
[3]; 
5225     listTypeTryConversion(o
,value
); 
5226     if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5227         unsigned char *p
, *zl 
= o
->ptr
; 
5228         p 
= ziplistIndex(zl
,index
); 
5230             addReply(c
,shared
.outofrangeerr
); 
5232             o
->ptr 
= ziplistDelete(o
->ptr
,&p
); 
5233             value 
= getDecodedObject(value
); 
5234             o
->ptr 
= ziplistInsert(o
->ptr
,p
,value
->ptr
,sdslen(value
->ptr
)); 
5235             decrRefCount(value
); 
5236             addReply(c
,shared
.ok
); 
5239     } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
5240         listNode 
*ln 
= listIndex(o
->ptr
,index
); 
5242             addReply(c
,shared
.outofrangeerr
); 
5244             decrRefCount((robj
*)listNodeValue(ln
)); 
5245             listNodeValue(ln
) = value
; 
5246             incrRefCount(value
); 
5247             addReply(c
,shared
.ok
); 
5251         redisPanic("Unknown list encoding"); 
5255 static void popGenericCommand(redisClient 
*c
, int where
) { 
5256     robj 
*o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
); 
5257     if (o 
== NULL 
|| checkType(c
,o
,REDIS_LIST
)) return; 
5259     robj 
*value 
= listTypePop(o
,where
); 
5260     if (value 
== NULL
) { 
5261         addReply(c
,shared
.nullbulk
); 
5263         addReplyBulk(c
,value
); 
5264         decrRefCount(value
); 
5265         if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5270 static void lpopCommand(redisClient 
*c
) { 
5271     popGenericCommand(c
,REDIS_HEAD
); 
5274 static void rpopCommand(redisClient 
*c
) { 
5275     popGenericCommand(c
,REDIS_TAIL
); 
5278 static void lrangeCommand(redisClient 
*c
) { 
5280     int start 
= atoi(c
->argv
[2]->ptr
); 
5281     int end 
= atoi(c
->argv
[3]->ptr
); 
5284     listTypeEntry entry
; 
5286     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
5287          || checkType(c
,o
,REDIS_LIST
)) return; 
5288     llen 
= listTypeLength(o
); 
5290     /* convert negative indexes */ 
5291     if (start 
< 0) start 
= llen
+start
; 
5292     if (end 
< 0) end 
= llen
+end
; 
5293     if (start 
< 0) start 
= 0; 
5294     if (end 
< 0) end 
= 0; 
5296     /* indexes sanity checks */ 
5297     if (start 
> end 
|| start 
>= llen
) { 
5298         /* Out of range start or start > end result in empty list */ 
5299         addReply(c
,shared
.emptymultibulk
); 
5302     if (end 
>= llen
) end 
= llen
-1; 
5303     rangelen 
= (end
-start
)+1; 
5305     /* Return the result in form of a multi-bulk reply */ 
5306     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
)); 
5307     listTypeIterator 
*li 
= listTypeInitIterator(o
,start
,REDIS_TAIL
); 
5308     for (j 
= 0; j 
< rangelen
; j
++) { 
5309         redisAssert(listTypeNext(li
,&entry
)); 
5310         value 
= listTypeGet(&entry
); 
5311         addReplyBulk(c
,value
); 
5312         decrRefCount(value
); 
5314     listTypeReleaseIterator(li
); 
5317 static void ltrimCommand(redisClient 
*c
) { 
5319     int start 
= atoi(c
->argv
[2]->ptr
); 
5320     int end 
= atoi(c
->argv
[3]->ptr
); 
5322     int j
, ltrim
, rtrim
; 
5326     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL 
|| 
5327         checkType(c
,o
,REDIS_LIST
)) return; 
5328     llen 
= listTypeLength(o
); 
5330     /* convert negative indexes */ 
5331     if (start 
< 0) start 
= llen
+start
; 
5332     if (end 
< 0) end 
= llen
+end
; 
5333     if (start 
< 0) start 
= 0; 
5334     if (end 
< 0) end 
= 0; 
5336     /* indexes sanity checks */ 
5337     if (start 
> end 
|| start 
>= llen
) { 
5338         /* Out of range start or start > end result in empty list */ 
5342         if (end 
>= llen
) end 
= llen
-1; 
5347     /* Remove list elements to perform the trim */ 
5348     if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
5349         o
->ptr 
= ziplistDeleteRange(o
->ptr
,0,ltrim
); 
5350         o
->ptr 
= ziplistDeleteRange(o
->ptr
,-rtrim
,rtrim
); 
5351     } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
5353         for (j 
= 0; j 
< ltrim
; j
++) { 
5354             ln 
= listFirst(list
); 
5355             listDelNode(list
,ln
); 
5357         for (j 
= 0; j 
< rtrim
; j
++) { 
5358             ln 
= listLast(list
); 
5359             listDelNode(list
,ln
); 
5362         redisPanic("Unknown list encoding"); 
5364     if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5366     addReply(c
,shared
.ok
); 
5369 static void lremCommand(redisClient 
*c
) { 
5370     robj 
*subject
, *obj 
= c
->argv
[3]; 
5371     int toremove 
= atoi(c
->argv
[2]->ptr
); 
5373     listTypeEntry entry
; 
5375     subject 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
); 
5376     if (subject 
== NULL 
|| checkType(c
,subject
,REDIS_LIST
)) return; 
5378     /* Make sure obj is raw when we're dealing with a ziplist */ 
5379     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) 
5380         obj 
= getDecodedObject(obj
); 
5382     listTypeIterator 
*li
; 
5384         toremove 
= -toremove
; 
5385         li 
= listTypeInitIterator(subject
,-1,REDIS_HEAD
); 
5387         li 
= listTypeInitIterator(subject
,0,REDIS_TAIL
); 
5390     while (listTypeNext(li
,&entry
)) { 
5391         if (listTypeEqual(&entry
,obj
)) { 
5392             listTypeDelete(&entry
); 
5395             if (toremove 
&& removed 
== toremove
) break; 
5398     listTypeReleaseIterator(li
); 
5400     /* Clean up raw encoded object */ 
5401     if (subject
->encoding 
== REDIS_ENCODING_ZIPLIST
) 
5404     if (listTypeLength(subject
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5405     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
)); 
5408 /* This is the semantic of this command: 
5409  *  RPOPLPUSH srclist dstlist: 
5410  *   IF LLEN(srclist) > 0 
5411  *     element = RPOP srclist 
5412  *     LPUSH dstlist element 
5419  * The idea is to be able to get an element from a list in a reliable way 
5420  * since the element is not just returned but pushed against another list 
5421  * as well. This command was originally proposed by Ezra Zygmuntowicz. 
5423 static void rpoplpushcommand(redisClient 
*c
) { 
5425     if ((sobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5426         checkType(c
,sobj
,REDIS_LIST
)) return; 
5428     if (listTypeLength(sobj
) == 0) { 
5429         addReply(c
,shared
.nullbulk
); 
5431         robj 
*dobj 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
5432         if (dobj 
&& checkType(c
,dobj
,REDIS_LIST
)) return; 
5433         value 
= listTypePop(sobj
,REDIS_TAIL
); 
5435         /* Add the element to the target list (unless it's directly 
5436          * passed to some BLPOP-ing client */ 
5437         if (!handleClientsWaitingListPush(c
,c
->argv
[2],value
)) { 
5438             /* Create the list if the key does not exist */ 
5440                 dobj 
= createZiplistObject(); 
5441                 dbAdd(c
->db
,c
->argv
[2],dobj
); 
5443             listTypePush(dobj
,value
,REDIS_HEAD
); 
5446         /* Send the element to the client as reply as well */ 
5447         addReplyBulk(c
,value
); 
5449         /* listTypePop returns an object with its refcount incremented */ 
5450         decrRefCount(value
); 
5452         /* Delete the source list when it is empty */ 
5453         if (listTypeLength(sobj
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5458 /* ==================================== Sets ================================ */ 
5460 static void saddCommand(redisClient 
*c
) { 
5463     set 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5465         set 
= createSetObject(); 
5466         dbAdd(c
->db
,c
->argv
[1],set
); 
5468         if (set
->type 
!= REDIS_SET
) { 
5469             addReply(c
,shared
.wrongtypeerr
); 
5473     if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) { 
5474         incrRefCount(c
->argv
[2]); 
5476         addReply(c
,shared
.cone
); 
5478         addReply(c
,shared
.czero
); 
5482 static void sremCommand(redisClient 
*c
) { 
5485     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5486         checkType(c
,set
,REDIS_SET
)) return; 
5488     if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) { 
5490         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
5491         if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5492         addReply(c
,shared
.cone
); 
5494         addReply(c
,shared
.czero
); 
5498 static void smoveCommand(redisClient 
*c
) { 
5499     robj 
*srcset
, *dstset
; 
5501     srcset 
= lookupKeyWrite(c
->db
,c
->argv
[1]); 
5502     dstset 
= lookupKeyWrite(c
->db
,c
->argv
[2]); 
5504     /* If the source key does not exist return 0, if it's of the wrong type 
5506     if (srcset 
== NULL 
|| srcset
->type 
!= REDIS_SET
) { 
5507         addReply(c
, srcset 
? shared
.wrongtypeerr 
: shared
.czero
); 
5510     /* Error if the destination key is not a set as well */ 
5511     if (dstset 
&& dstset
->type 
!= REDIS_SET
) { 
5512         addReply(c
,shared
.wrongtypeerr
); 
5515     /* Remove the element from the source set */ 
5516     if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) { 
5517         /* Key not found in the src set! return zero */ 
5518         addReply(c
,shared
.czero
); 
5521     if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset 
!= dstset
) 
5522         dbDelete(c
->db
,c
->argv
[1]); 
5524     /* Add the element to the destination set */ 
5526         dstset 
= createSetObject(); 
5527         dbAdd(c
->db
,c
->argv
[2],dstset
); 
5529     if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
) 
5530         incrRefCount(c
->argv
[3]); 
5531     addReply(c
,shared
.cone
); 
5534 static void sismemberCommand(redisClient 
*c
) { 
5537     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5538         checkType(c
,set
,REDIS_SET
)) return; 
5540     if (dictFind(set
->ptr
,c
->argv
[2])) 
5541         addReply(c
,shared
.cone
); 
5543         addReply(c
,shared
.czero
); 
5546 static void scardCommand(redisClient 
*c
) { 
5550     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
5551         checkType(c
,o
,REDIS_SET
)) return; 
5554     addReplyUlong(c
,dictSize(s
)); 
5557 static void spopCommand(redisClient 
*c
) { 
5561     if ((set 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5562         checkType(c
,set
,REDIS_SET
)) return; 
5564     de 
= dictGetRandomKey(set
->ptr
); 
5566         addReply(c
,shared
.nullbulk
); 
5568         robj 
*ele 
= dictGetEntryKey(de
); 
5570         addReplyBulk(c
,ele
); 
5571         dictDelete(set
->ptr
,ele
); 
5572         if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
); 
5573         if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
5578 static void srandmemberCommand(redisClient 
*c
) { 
5582     if ((set 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
5583         checkType(c
,set
,REDIS_SET
)) return; 
5585     de 
= dictGetRandomKey(set
->ptr
); 
5587         addReply(c
,shared
.nullbulk
); 
5589         robj 
*ele 
= dictGetEntryKey(de
); 
5591         addReplyBulk(c
,ele
); 
5595 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) { 
5596     dict 
**d1 
= (void*) s1
, **d2 
= (void*) s2
; 
5598     return dictSize(*d1
)-dictSize(*d2
); 
5601 static void sinterGenericCommand(redisClient 
*c
, robj 
**setskeys
, unsigned long setsnum
, robj 
*dstkey
) { 
5602     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
5605     robj 
*lenobj 
= NULL
, *dstset 
= NULL
; 
5606     unsigned long j
, cardinality 
= 0; 
5608     for (j 
= 0; j 
< setsnum
; j
++) { 
5612                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
5613                     lookupKeyRead(c
->db
,setskeys
[j
]); 
5617                 if (dbDelete(c
->db
,dstkey
)) 
5619                 addReply(c
,shared
.czero
); 
5621                 addReply(c
,shared
.emptymultibulk
); 
5625         if (setobj
->type 
!= REDIS_SET
) { 
5627             addReply(c
,shared
.wrongtypeerr
); 
5630         dv
[j
] = setobj
->ptr
; 
5632     /* Sort sets from the smallest to largest, this will improve our 
5633      * algorithm's performace */ 
5634     qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
); 
5636     /* The first thing we should output is the total number of elements... 
5637      * since this is a multi-bulk write, but at this stage we don't know 
5638      * the intersection set size, so we use a trick, append an empty object 
5639      * to the output list and save the pointer to later modify it with the 
5642         lenobj 
= createObject(REDIS_STRING
,NULL
); 
5644         decrRefCount(lenobj
); 
5646         /* If we have a target key where to store the resulting set 
5647          * create this key with an empty set inside */ 
5648         dstset 
= createSetObject(); 
5651     /* Iterate all the elements of the first (smallest) set, and test 
5652      * the element against all the other sets, if at least one set does 
5653      * not include the element it is discarded */ 
5654     di 
= dictGetIterator(dv
[0]); 
5656     while((de 
= dictNext(di
)) != NULL
) { 
5659         for (j 
= 1; j 
< setsnum
; j
++) 
5660             if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break; 
5662             continue; /* at least one set does not contain the member */ 
5663         ele 
= dictGetEntryKey(de
); 
5665             addReplyBulk(c
,ele
); 
5668             dictAdd(dstset
->ptr
,ele
,NULL
); 
5672     dictReleaseIterator(di
); 
5675         /* Store the resulting set into the target, if the intersection 
5676          * is not an empty set. */ 
5677         dbDelete(c
->db
,dstkey
); 
5678         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
5679             dbAdd(c
->db
,dstkey
,dstset
); 
5680             addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
)); 
5682             decrRefCount(dstset
); 
5683             addReply(c
,shared
.czero
); 
5687         lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
); 
5692 static void sinterCommand(redisClient 
*c
) { 
5693     sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
); 
5696 static void sinterstoreCommand(redisClient 
*c
) { 
5697     sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]); 
5700 #define REDIS_OP_UNION 0 
5701 #define REDIS_OP_DIFF 1 
5702 #define REDIS_OP_INTER 2 
5704 static void sunionDiffGenericCommand(redisClient 
*c
, robj 
**setskeys
, int setsnum
, robj 
*dstkey
, int op
) { 
5705     dict 
**dv 
= zmalloc(sizeof(dict
*)*setsnum
); 
5708     robj 
*dstset 
= NULL
; 
5709     int j
, cardinality 
= 0; 
5711     for (j 
= 0; j 
< setsnum
; j
++) { 
5715                     lookupKeyWrite(c
->db
,setskeys
[j
]) : 
5716                     lookupKeyRead(c
->db
,setskeys
[j
]); 
5721         if (setobj
->type 
!= REDIS_SET
) { 
5723             addReply(c
,shared
.wrongtypeerr
); 
5726         dv
[j
] = setobj
->ptr
; 
5729     /* We need a temp set object to store our union. If the dstkey 
5730      * is not NULL (that is, we are inside an SUNIONSTORE operation) then 
5731      * this set object will be the resulting object to set into the target key*/ 
5732     dstset 
= createSetObject(); 
5734     /* Iterate all the elements of all the sets, add every element a single 
5735      * time to the result set */ 
5736     for (j 
= 0; j 
< setsnum
; j
++) { 
5737         if (op 
== REDIS_OP_DIFF 
&& j 
== 0 && !dv
[j
]) break; /* result set is empty */ 
5738         if (!dv
[j
]) continue; /* non existing keys are like empty sets */ 
5740         di 
= dictGetIterator(dv
[j
]); 
5742         while((de 
= dictNext(di
)) != NULL
) { 
5745             /* dictAdd will not add the same element multiple times */ 
5746             ele 
= dictGetEntryKey(de
); 
5747             if (op 
== REDIS_OP_UNION 
|| j 
== 0) { 
5748                 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) { 
5752             } else if (op 
== REDIS_OP_DIFF
) { 
5753                 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) { 
5758         dictReleaseIterator(di
); 
5760         /* result set is empty? Exit asap. */ 
5761         if (op 
== REDIS_OP_DIFF 
&& cardinality 
== 0) break; 
5764     /* Output the content of the resulting set, if not in STORE mode */ 
5766         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
)); 
5767         di 
= dictGetIterator(dstset
->ptr
); 
5768         while((de 
= dictNext(di
)) != NULL
) { 
5771             ele 
= dictGetEntryKey(de
); 
5772             addReplyBulk(c
,ele
); 
5774         dictReleaseIterator(di
); 
5775         decrRefCount(dstset
); 
5777         /* If we have a target key where to store the resulting set 
5778          * create this key with the result set inside */ 
5779         dbDelete(c
->db
,dstkey
); 
5780         if (dictSize((dict
*)dstset
->ptr
) > 0) { 
5781             dbAdd(c
->db
,dstkey
,dstset
); 
5782             addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
)); 
5784             decrRefCount(dstset
); 
5785             addReply(c
,shared
.czero
); 
5792 static void sunionCommand(redisClient 
*c
) { 
5793     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
); 
5796 static void sunionstoreCommand(redisClient 
*c
) { 
5797     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
); 
5800 static void sdiffCommand(redisClient 
*c
) { 
5801     sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
); 
5804 static void sdiffstoreCommand(redisClient 
*c
) { 
5805     sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
); 
5808 /* ==================================== ZSets =============================== */ 
5810 /* ZSETs are ordered sets using two data structures to hold the same elements 
5811  * in order to get O(log(N)) INSERT and REMOVE operations into a sorted 
5814  * The elements are added to an hash table mapping Redis objects to scores. 
5815  * At the same time the elements are added to a skip list mapping scores 
5816  * to Redis objects (so objects are sorted by scores in this "view"). */ 
5818 /* This skiplist implementation is almost a C translation of the original 
5819  * algorithm described by William Pugh in "Skip Lists: A Probabilistic 
5820  * Alternative to Balanced Trees", modified in three ways: 
5821  * a) this implementation allows for repeated values. 
5822  * b) the comparison is not just by key (our 'score') but by satellite data. 
5823  * c) there is a back pointer, so it's a doubly linked list with the back 
5824  * pointers being only at "level 1". This allows to traverse the list 
5825  * from tail to head, useful for ZREVRANGE. */ 
5827 static zskiplistNode 
*zslCreateNode(int level
, double score
, robj 
*obj
) { 
5828     zskiplistNode 
*zn 
= zmalloc(sizeof(*zn
)); 
5830     zn
->forward 
= zmalloc(sizeof(zskiplistNode
*) * level
); 
5832         zn
->span 
= zmalloc(sizeof(unsigned int) * (level 
- 1)); 
5840 static zskiplist 
*zslCreate(void) { 
5844     zsl 
= zmalloc(sizeof(*zsl
)); 
5847     zsl
->header 
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
); 
5848     for (j 
= 0; j 
< ZSKIPLIST_MAXLEVEL
; j
++) { 
5849         zsl
->header
->forward
[j
] = NULL
; 
5851         /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */ 
5852         if (j 
< ZSKIPLIST_MAXLEVEL
-1) 
5853             zsl
->header
->span
[j
] = 0; 
5855     zsl
->header
->backward 
= NULL
; 
5860 static void zslFreeNode(zskiplistNode 
*node
) { 
5861     decrRefCount(node
->obj
); 
5862     zfree(node
->forward
); 
5867 static void zslFree(zskiplist 
*zsl
) { 
5868     zskiplistNode 
*node 
= zsl
->header
->forward
[0], *next
; 
5870     zfree(zsl
->header
->forward
); 
5871     zfree(zsl
->header
->span
); 
5874         next 
= node
->forward
[0]; 
5881 static int zslRandomLevel(void) { 
5883     while ((random()&0xFFFF) < (ZSKIPLIST_P 
* 0xFFFF)) 
5885     return (level
<ZSKIPLIST_MAXLEVEL
) ? level 
: ZSKIPLIST_MAXLEVEL
; 
5888 static void zslInsert(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5889     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5890     unsigned int rank
[ZSKIPLIST_MAXLEVEL
]; 
5894     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5895         /* store rank that is crossed to reach the insert position */ 
5896         rank
[i
] = i 
== (zsl
->level
-1) ? 0 : rank
[i
+1]; 
5898         while (x
->forward
[i
] && 
5899             (x
->forward
[i
]->score 
< score 
|| 
5900                 (x
->forward
[i
]->score 
== score 
&& 
5901                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) { 
5902             rank
[i
] += i 
> 0 ? x
->span
[i
-1] : 1; 
5907     /* we assume the key is not already inside, since we allow duplicated 
5908      * scores, and the re-insertion of score and redis object should never 
5909      * happpen since the caller of zslInsert() should test in the hash table 
5910      * if the element is already inside or not. */ 
5911     level 
= zslRandomLevel(); 
5912     if (level 
> zsl
->level
) { 
5913         for (i 
= zsl
->level
; i 
< level
; i
++) { 
5915             update
[i
] = zsl
->header
; 
5916             update
[i
]->span
[i
-1] = zsl
->length
; 
5920     x 
= zslCreateNode(level
,score
,obj
); 
5921     for (i 
= 0; i 
< level
; i
++) { 
5922         x
->forward
[i
] = update
[i
]->forward
[i
]; 
5923         update
[i
]->forward
[i
] = x
; 
5925         /* update span covered by update[i] as x is inserted here */ 
5927             x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]); 
5928             update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1; 
5932     /* increment span for untouched levels */ 
5933     for (i 
= level
; i 
< zsl
->level
; i
++) { 
5934         update
[i
]->span
[i
-1]++; 
5937     x
->backward 
= (update
[0] == zsl
->header
) ? NULL 
: update
[0]; 
5939         x
->forward
[0]->backward 
= x
; 
5945 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */ 
5946 void zslDeleteNode(zskiplist 
*zsl
, zskiplistNode 
*x
, zskiplistNode 
**update
) { 
5948     for (i 
= 0; i 
< zsl
->level
; i
++) { 
5949         if (update
[i
]->forward
[i
] == x
) { 
5951                 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1; 
5953             update
[i
]->forward
[i
] = x
->forward
[i
]; 
5955             /* invariant: i > 0, because update[0]->forward[0] 
5956              * is always equal to x */ 
5957             update
[i
]->span
[i
-1] -= 1; 
5960     if (x
->forward
[0]) { 
5961         x
->forward
[0]->backward 
= x
->backward
; 
5963         zsl
->tail 
= x
->backward
; 
5965     while(zsl
->level 
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
) 
5970 /* Delete an element with matching score/object from the skiplist. */ 
5971 static int zslDelete(zskiplist 
*zsl
, double score
, robj 
*obj
) { 
5972     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
5976     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
5977         while (x
->forward
[i
] && 
5978             (x
->forward
[i
]->score 
< score 
|| 
5979                 (x
->forward
[i
]->score 
== score 
&& 
5980                 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) 
5984     /* We may have multiple elements with the same score, what we need 
5985      * is to find the element with both the right score and object. */ 
5987     if (x 
&& score 
== x
->score 
&& equalStringObjects(x
->obj
,obj
)) { 
5988         zslDeleteNode(zsl
, x
, update
); 
5992         return 0; /* not found */ 
5994     return 0; /* not found */ 
5997 /* Delete all the elements with score between min and max from the skiplist. 
5998  * Min and mx are inclusive, so a score >= min || score <= max is deleted. 
5999  * Note that this function takes the reference to the hash table view of the 
6000  * sorted set, in order to remove the elements from the hash table too. */ 
6001 static unsigned long zslDeleteRangeByScore(zskiplist 
*zsl
, double min
, double max
, dict 
*dict
) { 
6002     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
6003     unsigned long removed 
= 0; 
6007     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6008         while (x
->forward
[i
] && x
->forward
[i
]->score 
< min
) 
6012     /* We may have multiple elements with the same score, what we need 
6013      * is to find the element with both the right score and object. */ 
6015     while (x 
&& x
->score 
<= max
) { 
6016         zskiplistNode 
*next 
= x
->forward
[0]; 
6017         zslDeleteNode(zsl
, x
, update
); 
6018         dictDelete(dict
,x
->obj
); 
6023     return removed
; /* not found */ 
6026 /* Delete all the elements with rank between start and end from the skiplist. 
6027  * Start and end are inclusive. Note that start and end need to be 1-based */ 
6028 static unsigned long zslDeleteRangeByRank(zskiplist 
*zsl
, unsigned int start
, unsigned int end
, dict 
*dict
) { 
6029     zskiplistNode 
*update
[ZSKIPLIST_MAXLEVEL
], *x
; 
6030     unsigned long traversed 
= 0, removed 
= 0; 
6034     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6035         while (x
->forward
[i
] && (traversed 
+ (i 
> 0 ? x
->span
[i
-1] : 1)) < start
) { 
6036             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
6044     while (x 
&& traversed 
<= end
) { 
6045         zskiplistNode 
*next 
= x
->forward
[0]; 
6046         zslDeleteNode(zsl
, x
, update
); 
6047         dictDelete(dict
,x
->obj
); 
6056 /* Find the first node having a score equal or greater than the specified one. 
6057  * Returns NULL if there is no match. */ 
6058 static zskiplistNode 
*zslFirstWithScore(zskiplist 
*zsl
, double score
) { 
6063     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6064         while (x
->forward
[i
] && x
->forward
[i
]->score 
< score
) 
6067     /* We may have multiple elements with the same score, what we need 
6068      * is to find the element with both the right score and object. */ 
6069     return x
->forward
[0]; 
6072 /* Find the rank for an element by both score and key. 
6073  * Returns 0 when the element cannot be found, rank otherwise. 
6074  * Note that the rank is 1-based due to the span of zsl->header to the 
6076 static unsigned long zslistTypeGetRank(zskiplist 
*zsl
, double score
, robj 
*o
) { 
6078     unsigned long rank 
= 0; 
6082     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6083         while (x
->forward
[i
] && 
6084             (x
->forward
[i
]->score 
< score 
|| 
6085                 (x
->forward
[i
]->score 
== score 
&& 
6086                 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) { 
6087             rank 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
6091         /* x might be equal to zsl->header, so test if obj is non-NULL */ 
6092         if (x
->obj 
&& equalStringObjects(x
->obj
,o
)) { 
6099 /* Finds an element by its rank. The rank argument needs to be 1-based. */ 
6100 zskiplistNode
* zslistTypeGetElementByRank(zskiplist 
*zsl
, unsigned long rank
) { 
6102     unsigned long traversed 
= 0; 
6106     for (i 
= zsl
->level
-1; i 
>= 0; i
--) { 
6107         while (x
->forward
[i
] && (traversed 
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
) 
6109             traversed 
+= i 
> 0 ? x
->span
[i
-1] : 1; 
6112         if (traversed 
== rank
) { 
6119 /* The actual Z-commands implementations */ 
6121 /* This generic command implements both ZADD and ZINCRBY. 
6122  * scoreval is the score if the operation is a ZADD (doincrement == 0) or 
6123  * the increment if the operation is a ZINCRBY (doincrement == 1). */ 
6124 static void zaddGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*ele
, double scoreval
, int doincrement
) { 
6129     if (isnan(scoreval
)) { 
6130         addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n")); 
6134     zsetobj 
= lookupKeyWrite(c
->db
,key
); 
6135     if (zsetobj 
== NULL
) { 
6136         zsetobj 
= createZsetObject(); 
6137         dbAdd(c
->db
,key
,zsetobj
); 
6139         if (zsetobj
->type 
!= REDIS_ZSET
) { 
6140             addReply(c
,shared
.wrongtypeerr
); 
6146     /* Ok now since we implement both ZADD and ZINCRBY here the code 
6147      * needs to handle the two different conditions. It's all about setting 
6148      * '*score', that is, the new score to set, to the right value. */ 
6149     score 
= zmalloc(sizeof(double)); 
6153         /* Read the old score. If the element was not present starts from 0 */ 
6154         de 
= dictFind(zs
->dict
,ele
); 
6156             double *oldscore 
= dictGetEntryVal(de
); 
6157             *score 
= *oldscore 
+ scoreval
; 
6161         if (isnan(*score
)) { 
6163                 sdsnew("-ERR resulting score is Not A Number (nan)\r\n")); 
6165             /* Note that we don't need to check if the zset may be empty and 
6166              * should be removed here, as we can only obtain Nan as score if 
6167              * there was already an element in the sorted set. */ 
6174     /* What follows is a simple remove and re-insert operation that is common 
6175      * to both ZADD and ZINCRBY... */ 
6176     if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) { 
6177         /* case 1: New element */ 
6178         incrRefCount(ele
); /* added to hash */ 
6179         zslInsert(zs
->zsl
,*score
,ele
); 
6180         incrRefCount(ele
); /* added to skiplist */ 
6183             addReplyDouble(c
,*score
); 
6185             addReply(c
,shared
.cone
); 
6190         /* case 2: Score update operation */ 
6191         de 
= dictFind(zs
->dict
,ele
); 
6192         redisAssert(de 
!= NULL
); 
6193         oldscore 
= dictGetEntryVal(de
); 
6194         if (*score 
!= *oldscore
) { 
6197             /* Remove and insert the element in the skip list with new score */ 
6198             deleted 
= zslDelete(zs
->zsl
,*oldscore
,ele
); 
6199             redisAssert(deleted 
!= 0); 
6200             zslInsert(zs
->zsl
,*score
,ele
); 
6202             /* Update the score in the hash table */ 
6203             dictReplace(zs
->dict
,ele
,score
); 
6209             addReplyDouble(c
,*score
); 
6211             addReply(c
,shared
.czero
); 
6215 static void zaddCommand(redisClient 
*c
) { 
6218     if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return; 
6219     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0); 
6222 static void zincrbyCommand(redisClient 
*c
) { 
6225     if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return; 
6226     zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1); 
6229 static void zremCommand(redisClient 
*c
) { 
6236     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6237         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
6240     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6242         addReply(c
,shared
.czero
); 
6245     /* Delete from the skiplist */ 
6246     oldscore 
= dictGetEntryVal(de
); 
6247     deleted 
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]); 
6248     redisAssert(deleted 
!= 0); 
6250     /* Delete from the hash table */ 
6251     dictDelete(zs
->dict
,c
->argv
[2]); 
6252     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
6253     if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
6255     addReply(c
,shared
.cone
); 
6258 static void zremrangebyscoreCommand(redisClient 
*c
) { 
6265     if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) || 
6266         (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return; 
6268     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6269         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
6272     deleted 
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
); 
6273     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
6274     if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
6275     server
.dirty 
+= deleted
; 
6276     addReplyLongLong(c
,deleted
); 
6279 static void zremrangebyrankCommand(redisClient 
*c
) { 
6287     if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) || 
6288         (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return; 
6290     if ((zsetobj 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6291         checkType(c
,zsetobj
,REDIS_ZSET
)) return; 
6293     llen 
= zs
->zsl
->length
; 
6295     /* convert negative indexes */ 
6296     if (start 
< 0) start 
= llen
+start
; 
6297     if (end 
< 0) end 
= llen
+end
; 
6298     if (start 
< 0) start 
= 0; 
6299     if (end 
< 0) end 
= 0; 
6301     /* indexes sanity checks */ 
6302     if (start 
> end 
|| start 
>= llen
) { 
6303         addReply(c
,shared
.czero
); 
6306     if (end 
>= llen
) end 
= llen
-1; 
6308     /* increment start and end because zsl*Rank functions 
6309      * use 1-based rank */ 
6310     deleted 
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
); 
6311     if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
); 
6312     if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
6313     server
.dirty 
+= deleted
; 
6314     addReplyLongLong(c
, deleted
); 
6322 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) { 
6323     zsetopsrc 
*d1 
= (void*) s1
, *d2 
= (void*) s2
; 
6324     unsigned long size1
, size2
; 
6325     size1 
= d1
->dict 
? dictSize(d1
->dict
) : 0; 
6326     size2 
= d2
->dict 
? dictSize(d2
->dict
) : 0; 
6327     return size1 
- size2
; 
6330 #define REDIS_AGGR_SUM 1 
6331 #define REDIS_AGGR_MIN 2 
6332 #define REDIS_AGGR_MAX 3 
6333 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e)) 
6335 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) { 
6336     if (aggregate 
== REDIS_AGGR_SUM
) { 
6337         *target 
= *target 
+ val
; 
6338     } else if (aggregate 
== REDIS_AGGR_MIN
) { 
6339         *target 
= val 
< *target 
? val 
: *target
; 
6340     } else if (aggregate 
== REDIS_AGGR_MAX
) { 
6341         *target 
= val 
> *target 
? val 
: *target
; 
6344         redisPanic("Unknown ZUNION/INTER aggregate type"); 
6348 static void zunionInterGenericCommand(redisClient 
*c
, robj 
*dstkey
, int op
) { 
6350     int aggregate 
= REDIS_AGGR_SUM
; 
6357     /* expect setnum input keys to be given */ 
6358     setnum 
= atoi(c
->argv
[2]->ptr
); 
6360         addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n")); 
6364     /* test if the expected number of keys would overflow */ 
6365     if (3+setnum 
> c
->argc
) { 
6366         addReply(c
,shared
.syntaxerr
); 
6370     /* read keys to be used for input */ 
6371     src 
= zmalloc(sizeof(zsetopsrc
) * setnum
); 
6372     for (i 
= 0, j 
= 3; i 
< setnum
; i
++, j
++) { 
6373         robj 
*obj 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
6377             if (obj
->type 
== REDIS_ZSET
) { 
6378                 src
[i
].dict 
= ((zset
*)obj
->ptr
)->dict
; 
6379             } else if (obj
->type 
== REDIS_SET
) { 
6380                 src
[i
].dict 
= (obj
->ptr
); 
6383                 addReply(c
,shared
.wrongtypeerr
); 
6388         /* default all weights to 1 */ 
6389         src
[i
].weight 
= 1.0; 
6392     /* parse optional extra arguments */ 
6394         int remaining 
= c
->argc 
- j
; 
6397             if (remaining 
>= (setnum 
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) { 
6399                 for (i 
= 0; i 
< setnum
; i
++, j
++, remaining
--) { 
6400                     if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
) 
6403             } else if (remaining 
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) { 
6405                 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) { 
6406                     aggregate 
= REDIS_AGGR_SUM
; 
6407                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) { 
6408                     aggregate 
= REDIS_AGGR_MIN
; 
6409                 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) { 
6410                     aggregate 
= REDIS_AGGR_MAX
; 
6413                     addReply(c
,shared
.syntaxerr
); 
6419                 addReply(c
,shared
.syntaxerr
); 
6425     /* sort sets from the smallest to largest, this will improve our 
6426      * algorithm's performance */ 
6427     qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
); 
6429     dstobj 
= createZsetObject(); 
6430     dstzset 
= dstobj
->ptr
; 
6432     if (op 
== REDIS_OP_INTER
) { 
6433         /* skip going over all entries if the smallest zset is NULL or empty */ 
6434         if (src
[0].dict 
&& dictSize(src
[0].dict
) > 0) { 
6435             /* precondition: as src[0].dict is non-empty and the zsets are ordered 
6436              * from small to large, all src[i > 0].dict are non-empty too */ 
6437             di 
= dictGetIterator(src
[0].dict
); 
6438             while((de 
= dictNext(di
)) != NULL
) { 
6439                 double *score 
= zmalloc(sizeof(double)), value
; 
6440                 *score 
= src
[0].weight 
* zunionInterDictValue(de
); 
6442                 for (j 
= 1; j 
< setnum
; j
++) { 
6443                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
6445                         value 
= src
[j
].weight 
* zunionInterDictValue(other
); 
6446                         zunionInterAggregate(score
, value
, aggregate
); 
6452                 /* skip entry when not present in every source dict */ 
6456                     robj 
*o 
= dictGetEntryKey(de
); 
6457                     dictAdd(dstzset
->dict
,o
,score
); 
6458                     incrRefCount(o
); /* added to dictionary */ 
6459                     zslInsert(dstzset
->zsl
,*score
,o
); 
6460                     incrRefCount(o
); /* added to skiplist */ 
6463             dictReleaseIterator(di
); 
6465     } else if (op 
== REDIS_OP_UNION
) { 
6466         for (i 
= 0; i 
< setnum
; i
++) { 
6467             if (!src
[i
].dict
) continue; 
6469             di 
= dictGetIterator(src
[i
].dict
); 
6470             while((de 
= dictNext(di
)) != NULL
) { 
6471                 /* skip key when already processed */ 
6472                 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue; 
6474                 double *score 
= zmalloc(sizeof(double)), value
; 
6475                 *score 
= src
[i
].weight 
* zunionInterDictValue(de
); 
6477                 /* because the zsets are sorted by size, its only possible 
6478                  * for sets at larger indices to hold this entry */ 
6479                 for (j 
= (i
+1); j 
< setnum
; j
++) { 
6480                     dictEntry 
*other 
= dictFind(src
[j
].dict
,dictGetEntryKey(de
)); 
6482                         value 
= src
[j
].weight 
* zunionInterDictValue(other
); 
6483                         zunionInterAggregate(score
, value
, aggregate
); 
6487                 robj 
*o 
= dictGetEntryKey(de
); 
6488                 dictAdd(dstzset
->dict
,o
,score
); 
6489                 incrRefCount(o
); /* added to dictionary */ 
6490                 zslInsert(dstzset
->zsl
,*score
,o
); 
6491                 incrRefCount(o
); /* added to skiplist */ 
6493             dictReleaseIterator(di
); 
6496         /* unknown operator */ 
6497         redisAssert(op 
== REDIS_OP_INTER 
|| op 
== REDIS_OP_UNION
); 
6500     dbDelete(c
->db
,dstkey
); 
6501     if (dstzset
->zsl
->length
) { 
6502         dbAdd(c
->db
,dstkey
,dstobj
); 
6503         addReplyLongLong(c
, dstzset
->zsl
->length
); 
6506         decrRefCount(dstobj
); 
6507         addReply(c
, shared
.czero
); 
6512 static void zunionstoreCommand(redisClient 
*c
) { 
6513     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
); 
6516 static void zinterstoreCommand(redisClient 
*c
) { 
6517     zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
); 
6520 static void zrangeGenericCommand(redisClient 
*c
, int reverse
) { 
6532     if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) || 
6533         (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return; 
6535     if (c
->argc 
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) { 
6537     } else if (c
->argc 
>= 5) { 
6538         addReply(c
,shared
.syntaxerr
); 
6542     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
6543          || checkType(c
,o
,REDIS_ZSET
)) return; 
6548     /* convert negative indexes */ 
6549     if (start 
< 0) start 
= llen
+start
; 
6550     if (end 
< 0) end 
= llen
+end
; 
6551     if (start 
< 0) start 
= 0; 
6552     if (end 
< 0) end 
= 0; 
6554     /* indexes sanity checks */ 
6555     if (start 
> end 
|| start 
>= llen
) { 
6556         /* Out of range start or start > end result in empty list */ 
6557         addReply(c
,shared
.emptymultibulk
); 
6560     if (end 
>= llen
) end 
= llen
-1; 
6561     rangelen 
= (end
-start
)+1; 
6563     /* check if starting point is trivial, before searching 
6564      * the element in log(N) time */ 
6566         ln 
= start 
== 0 ? zsl
->tail 
: zslistTypeGetElementByRank(zsl
, llen
-start
); 
6569             zsl
->header
->forward
[0] : zslistTypeGetElementByRank(zsl
, start
+1); 
6572     /* Return the result in form of a multi-bulk reply */ 
6573     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n", 
6574         withscores 
? (rangelen
*2) : rangelen
)); 
6575     for (j 
= 0; j 
< rangelen
; j
++) { 
6577         addReplyBulk(c
,ele
); 
6579             addReplyDouble(c
,ln
->score
); 
6580         ln 
= reverse 
? ln
->backward 
: ln
->forward
[0]; 
6584 static void zrangeCommand(redisClient 
*c
) { 
6585     zrangeGenericCommand(c
,0); 
6588 static void zrevrangeCommand(redisClient 
*c
) { 
6589     zrangeGenericCommand(c
,1); 
6592 /* This command implements both ZRANGEBYSCORE and ZCOUNT. 
6593  * If justcount is non-zero, just the count is returned. */ 
6594 static void genericZrangebyscoreCommand(redisClient 
*c
, int justcount
) { 
6597     int minex 
= 0, maxex 
= 0; /* are min or max exclusive? */ 
6598     int offset 
= 0, limit 
= -1; 
6602     /* Parse the min-max interval. If one of the values is prefixed 
6603      * by the "(" character, it's considered "open". For instance 
6604      * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max 
6605      * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */ 
6606     if (((char*)c
->argv
[2]->ptr
)[0] == '(') { 
6607         min 
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
); 
6610         min 
= strtod(c
->argv
[2]->ptr
,NULL
); 
6612     if (((char*)c
->argv
[3]->ptr
)[0] == '(') { 
6613         max 
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
); 
6616         max 
= strtod(c
->argv
[3]->ptr
,NULL
); 
6619     /* Parse "WITHSCORES": note that if the command was called with 
6620      * the name ZCOUNT then we are sure that c->argc == 4, so we'll never 
6621      * enter the following paths to parse WITHSCORES and LIMIT. */ 
6622     if (c
->argc 
== 5 || c
->argc 
== 8) { 
6623         if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0) 
6628     if (c
->argc 
!= (4 + withscores
) && c
->argc 
!= (7 + withscores
)) 
6632             sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n")); 
6637     if (c
->argc 
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) { 
6638         addReply(c
,shared
.syntaxerr
); 
6640     } else if (c
->argc 
== (7 + withscores
)) { 
6641         offset 
= atoi(c
->argv
[5]->ptr
); 
6642         limit 
= atoi(c
->argv
[6]->ptr
); 
6643         if (offset 
< 0) offset 
= 0; 
6646     /* Ok, lookup the key and get the range */ 
6647     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
6649         addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
6651         if (o
->type 
!= REDIS_ZSET
) { 
6652             addReply(c
,shared
.wrongtypeerr
); 
6654             zset 
*zsetobj 
= o
->ptr
; 
6655             zskiplist 
*zsl 
= zsetobj
->zsl
; 
6657             robj 
*ele
, *lenobj 
= NULL
; 
6658             unsigned long rangelen 
= 0; 
6660             /* Get the first node with the score >= min, or with 
6661              * score > min if 'minex' is true. */ 
6662             ln 
= zslFirstWithScore(zsl
,min
); 
6663             while (minex 
&& ln 
&& ln
->score 
== min
) ln 
= ln
->forward
[0]; 
6666                 /* No element matching the speciifed interval */ 
6667                 addReply(c
,justcount 
? shared
.czero 
: shared
.emptymultibulk
); 
6671             /* We don't know in advance how many matching elements there 
6672              * are in the list, so we push this object that will represent 
6673              * the multi-bulk length in the output buffer, and will "fix" 
6676                 lenobj 
= createObject(REDIS_STRING
,NULL
); 
6678                 decrRefCount(lenobj
); 
6681             while(ln 
&& (maxex 
? (ln
->score 
< max
) : (ln
->score 
<= max
))) { 
6684                     ln 
= ln
->forward
[0]; 
6687                 if (limit 
== 0) break; 
6690                     addReplyBulk(c
,ele
); 
6692                         addReplyDouble(c
,ln
->score
); 
6694                 ln 
= ln
->forward
[0]; 
6696                 if (limit 
> 0) limit
--; 
6699                 addReplyLongLong(c
,(long)rangelen
); 
6701                 lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n", 
6702                      withscores 
? (rangelen
*2) : rangelen
); 
6708 static void zrangebyscoreCommand(redisClient 
*c
) { 
6709     genericZrangebyscoreCommand(c
,0); 
6712 static void zcountCommand(redisClient 
*c
) { 
6713     genericZrangebyscoreCommand(c
,1); 
6716 static void zcardCommand(redisClient 
*c
) { 
6720     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
6721         checkType(c
,o
,REDIS_ZSET
)) return; 
6724     addReplyUlong(c
,zs
->zsl
->length
); 
6727 static void zscoreCommand(redisClient 
*c
) { 
6732     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6733         checkType(c
,o
,REDIS_ZSET
)) return; 
6736     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6738         addReply(c
,shared
.nullbulk
); 
6740         double *score 
= dictGetEntryVal(de
); 
6742         addReplyDouble(c
,*score
); 
6746 static void zrankGenericCommand(redisClient 
*c
, int reverse
) { 
6754     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
6755         checkType(c
,o
,REDIS_ZSET
)) return; 
6759     de 
= dictFind(zs
->dict
,c
->argv
[2]); 
6761         addReply(c
,shared
.nullbulk
); 
6765     score 
= dictGetEntryVal(de
); 
6766     rank 
= zslistTypeGetRank(zsl
, *score
, c
->argv
[2]); 
6769             addReplyLongLong(c
, zsl
->length 
- rank
); 
6771             addReplyLongLong(c
, rank
-1); 
6774         addReply(c
,shared
.nullbulk
); 
6778 static void zrankCommand(redisClient 
*c
) { 
6779     zrankGenericCommand(c
, 0); 
6782 static void zrevrankCommand(redisClient 
*c
) { 
6783     zrankGenericCommand(c
, 1); 
6786 /* ========================= Hashes utility functions ======================= */ 
6787 #define REDIS_HASH_KEY 1 
6788 #define REDIS_HASH_VALUE 2 
6790 /* Check the length of a number of objects to see if we need to convert a 
6791  * zipmap to a real hash. Note that we only check string encoded objects 
6792  * as their string length can be queried in constant time. */ 
6793 static void hashTypeTryConversion(robj 
*subject
, robj 
**argv
, int start
, int end
) { 
6795     if (subject
->encoding 
!= REDIS_ENCODING_ZIPMAP
) return; 
6797     for (i 
= start
; i 
<= end
; i
++) { 
6798         if (argv
[i
]->encoding 
== REDIS_ENCODING_RAW 
&& 
6799             sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
) 
6801             convertToRealHash(subject
); 
6807 /* Encode given objects in-place when the hash uses a dict. */ 
6808 static void hashTypeTryObjectEncoding(robj 
*subject
, robj 
**o1
, robj 
**o2
) { 
6809     if (subject
->encoding 
== REDIS_ENCODING_HT
) { 
6810         if (o1
) *o1 
= tryObjectEncoding(*o1
); 
6811         if (o2
) *o2 
= tryObjectEncoding(*o2
); 
6815 /* Get the value from a hash identified by key. Returns either a string 
6816  * object or NULL if the value cannot be found. The refcount of the object 
6817  * is always increased by 1 when the value was found. */ 
6818 static robj 
*hashTypeGet(robj 
*o
, robj 
*key
) { 
6820     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6823         key 
= getDecodedObject(key
); 
6824         if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) { 
6825             value 
= createStringObject((char*)v
,vlen
); 
6829         dictEntry 
*de 
= dictFind(o
->ptr
,key
); 
6831             value 
= dictGetEntryVal(de
); 
6832             incrRefCount(value
); 
6838 /* Test if the key exists in the given hash. Returns 1 if the key 
6839  * exists and 0 when it doesn't. */ 
6840 static int hashTypeExists(robj 
*o
, robj 
*key
) { 
6841     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6842         key 
= getDecodedObject(key
); 
6843         if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) { 
6849         if (dictFind(o
->ptr
,key
) != NULL
) { 
6856 /* Add an element, discard the old if the key already exists. 
6857  * Return 0 on insert and 1 on update. */ 
6858 static int hashTypeSet(robj 
*o
, robj 
*key
, robj 
*value
) { 
6860     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6861         key 
= getDecodedObject(key
); 
6862         value 
= getDecodedObject(value
); 
6863         o
->ptr 
= zipmapSet(o
->ptr
, 
6864             key
->ptr
,sdslen(key
->ptr
), 
6865             value
->ptr
,sdslen(value
->ptr
), &update
); 
6867         decrRefCount(value
); 
6869         /* Check if the zipmap needs to be upgraded to a real hash table */ 
6870         if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
) 
6871             convertToRealHash(o
); 
6873         if (dictReplace(o
->ptr
,key
,value
)) { 
6880         incrRefCount(value
); 
6885 /* Delete an element from a hash. 
6886  * Return 1 on deleted and 0 on not found. */ 
6887 static int hashTypeDelete(robj 
*o
, robj 
*key
) { 
6889     if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6890         key 
= getDecodedObject(key
); 
6891         o
->ptr 
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
); 
6894         deleted 
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
; 
6895         /* Always check if the dictionary needs a resize after a delete. */ 
6896         if (deleted 
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
); 
6901 /* Return the number of elements in a hash. */ 
6902 static unsigned long hashTypeLength(robj 
*o
) { 
6903     return (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) ? 
6904         zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
); 
6907 /* Structure to hold hash iteration abstration. Note that iteration over 
6908  * hashes involves both fields and values. Because it is possible that 
6909  * not both are required, store pointers in the iterator to avoid 
6910  * unnecessary memory allocation for fields/values. */ 
6914     unsigned char *zk
, *zv
; 
6915     unsigned int zklen
, zvlen
; 
6921 static hashTypeIterator 
*hashTypeInitIterator(robj 
*subject
) { 
6922     hashTypeIterator 
*hi 
= zmalloc(sizeof(hashTypeIterator
)); 
6923     hi
->encoding 
= subject
->encoding
; 
6924     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6925         hi
->zi 
= zipmapRewind(subject
->ptr
); 
6926     } else if (hi
->encoding 
== REDIS_ENCODING_HT
) { 
6927         hi
->di 
= dictGetIterator(subject
->ptr
); 
6934 static void hashTypeReleaseIterator(hashTypeIterator 
*hi
) { 
6935     if (hi
->encoding 
== REDIS_ENCODING_HT
) { 
6936         dictReleaseIterator(hi
->di
); 
6941 /* Move to the next entry in the hash. Return REDIS_OK when the next entry 
6942  * could be found and REDIS_ERR when the iterator reaches the end. */ 
6943 static int hashTypeNext(hashTypeIterator 
*hi
) { 
6944     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6945         if ((hi
->zi 
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
, 
6946             &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
; 
6948         if ((hi
->de 
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
; 
6953 /* Get key or value object at current iteration position. 
6954  * This increases the refcount of the field object by 1. */ 
6955 static robj 
*hashTypeCurrent(hashTypeIterator 
*hi
, int what
) { 
6957     if (hi
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
6958         if (what 
& REDIS_HASH_KEY
) { 
6959             o 
= createStringObject((char*)hi
->zk
,hi
->zklen
); 
6961             o 
= createStringObject((char*)hi
->zv
,hi
->zvlen
); 
6964         if (what 
& REDIS_HASH_KEY
) { 
6965             o 
= dictGetEntryKey(hi
->de
); 
6967             o 
= dictGetEntryVal(hi
->de
); 
6974 static robj 
*hashTypeLookupWriteOrCreate(redisClient 
*c
, robj 
*key
) { 
6975     robj 
*o 
= lookupKeyWrite(c
->db
,key
); 
6977         o 
= createHashObject(); 
6980         if (o
->type 
!= REDIS_HASH
) { 
6981             addReply(c
,shared
.wrongtypeerr
); 
6988 /* ============================= Hash commands ============================== */ 
6989 static void hsetCommand(redisClient 
*c
) { 
6993     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
6994     hashTypeTryConversion(o
,c
->argv
,2,3); 
6995     hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]); 
6996     update 
= hashTypeSet(o
,c
->argv
[2],c
->argv
[3]); 
6997     addReply(c
, update 
? shared
.czero 
: shared
.cone
); 
7001 static void hsetnxCommand(redisClient 
*c
) { 
7003     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
7004     hashTypeTryConversion(o
,c
->argv
,2,3); 
7006     if (hashTypeExists(o
, c
->argv
[2])) { 
7007         addReply(c
, shared
.czero
); 
7009         hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]); 
7010         hashTypeSet(o
,c
->argv
[2],c
->argv
[3]); 
7011         addReply(c
, shared
.cone
); 
7016 static void hmsetCommand(redisClient 
*c
) { 
7020     if ((c
->argc 
% 2) == 1) { 
7021         addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n")); 
7025     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
7026     hashTypeTryConversion(o
,c
->argv
,2,c
->argc
-1); 
7027     for (i 
= 2; i 
< c
->argc
; i 
+= 2) { 
7028         hashTypeTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]); 
7029         hashTypeSet(o
,c
->argv
[i
],c
->argv
[i
+1]); 
7031     addReply(c
, shared
.ok
); 
7035 static void hincrbyCommand(redisClient 
*c
) { 
7036     long long value
, incr
; 
7037     robj 
*o
, *current
, *new; 
7039     if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return; 
7040     if ((o 
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return; 
7041     if ((current 
= hashTypeGet(o
,c
->argv
[2])) != NULL
) { 
7042         if (getLongLongFromObjectOrReply(c
,current
,&value
, 
7043             "hash value is not an integer") != REDIS_OK
) { 
7044             decrRefCount(current
); 
7047         decrRefCount(current
); 
7053     new = createStringObjectFromLongLong(value
); 
7054     hashTypeTryObjectEncoding(o
,&c
->argv
[2],NULL
); 
7055     hashTypeSet(o
,c
->argv
[2],new); 
7057     addReplyLongLong(c
,value
); 
7061 static void hgetCommand(redisClient 
*c
) { 
7063     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL 
|| 
7064         checkType(c
,o
,REDIS_HASH
)) return; 
7066     if ((value 
= hashTypeGet(o
,c
->argv
[2])) != NULL
) { 
7067         addReplyBulk(c
,value
); 
7068         decrRefCount(value
); 
7070         addReply(c
,shared
.nullbulk
); 
7074 static void hmgetCommand(redisClient 
*c
) { 
7077     o 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
7078     if (o 
!= NULL 
&& o
->type 
!= REDIS_HASH
) { 
7079         addReply(c
,shared
.wrongtypeerr
); 
7082     /* Note the check for o != NULL happens inside the loop. This is 
7083      * done because objects that cannot be found are considered to be 
7084      * an empty hash. The reply should then be a series of NULLs. */ 
7085     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2)); 
7086     for (i 
= 2; i 
< c
->argc
; i
++) { 
7087         if (o 
!= NULL 
&& (value 
= hashTypeGet(o
,c
->argv
[i
])) != NULL
) { 
7088             addReplyBulk(c
,value
); 
7089             decrRefCount(value
); 
7091             addReply(c
,shared
.nullbulk
); 
7096 static void hdelCommand(redisClient 
*c
) { 
7098     if ((o 
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
7099         checkType(c
,o
,REDIS_HASH
)) return; 
7101     if (hashTypeDelete(o
,c
->argv
[2])) { 
7102         if (hashTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]); 
7103         addReply(c
,shared
.cone
); 
7106         addReply(c
,shared
.czero
); 
7110 static void hlenCommand(redisClient 
*c
) { 
7112     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
7113         checkType(c
,o
,REDIS_HASH
)) return; 
7115     addReplyUlong(c
,hashTypeLength(o
)); 
7118 static void genericHgetallCommand(redisClient 
*c
, int flags
) { 
7119     robj 
*o
, *lenobj
, *obj
; 
7120     unsigned long count 
= 0; 
7121     hashTypeIterator 
*hi
; 
7123     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
 
7124         || checkType(c
,o
,REDIS_HASH
)) return; 
7126     lenobj 
= createObject(REDIS_STRING
,NULL
); 
7128     decrRefCount(lenobj
); 
7130     hi 
= hashTypeInitIterator(o
); 
7131     while (hashTypeNext(hi
) != REDIS_ERR
) { 
7132         if (flags 
& REDIS_HASH_KEY
) { 
7133             obj 
= hashTypeCurrent(hi
,REDIS_HASH_KEY
); 
7134             addReplyBulk(c
,obj
); 
7138         if (flags 
& REDIS_HASH_VALUE
) { 
7139             obj 
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
); 
7140             addReplyBulk(c
,obj
); 
7145     hashTypeReleaseIterator(hi
); 
7147     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%lu\r\n",count
); 
7150 static void hkeysCommand(redisClient 
*c
) { 
7151     genericHgetallCommand(c
,REDIS_HASH_KEY
); 
7154 static void hvalsCommand(redisClient 
*c
) { 
7155     genericHgetallCommand(c
,REDIS_HASH_VALUE
); 
7158 static void hgetallCommand(redisClient 
*c
) { 
7159     genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
); 
7162 static void hexistsCommand(redisClient 
*c
) { 
7164     if ((o 
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL 
|| 
7165         checkType(c
,o
,REDIS_HASH
)) return; 
7167     addReply(c
, hashTypeExists(o
,c
->argv
[2]) ? shared
.cone 
: shared
.czero
); 
7170 static void convertToRealHash(robj 
*o
) { 
7171     unsigned char *key
, *val
, *p
, *zm 
= o
->ptr
; 
7172     unsigned int klen
, vlen
; 
7173     dict 
*dict 
= dictCreate(&hashDictType
,NULL
); 
7175     assert(o
->type 
== REDIS_HASH 
&& o
->encoding 
!= REDIS_ENCODING_HT
); 
7176     p 
= zipmapRewind(zm
); 
7177     while((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) { 
7178         robj 
*keyobj
, *valobj
; 
7180         keyobj 
= createStringObject((char*)key
,klen
); 
7181         valobj 
= createStringObject((char*)val
,vlen
); 
7182         keyobj 
= tryObjectEncoding(keyobj
); 
7183         valobj 
= tryObjectEncoding(valobj
); 
7184         dictAdd(dict
,keyobj
,valobj
); 
7186     o
->encoding 
= REDIS_ENCODING_HT
; 
7191 /* ========================= Non type-specific commands  ==================== */ 
7193 static void flushdbCommand(redisClient 
*c
) { 
7194     server
.dirty 
+= dictSize(c
->db
->dict
); 
7195     touchWatchedKeysOnFlush(c
->db
->id
); 
7196     dictEmpty(c
->db
->dict
); 
7197     dictEmpty(c
->db
->expires
); 
7198     addReply(c
,shared
.ok
); 
7201 static void flushallCommand(redisClient 
*c
) { 
7202     touchWatchedKeysOnFlush(-1); 
7203     server
.dirty 
+= emptyDb(); 
7204     addReply(c
,shared
.ok
); 
7205     if (server
.bgsavechildpid 
!= -1) { 
7206         kill(server
.bgsavechildpid
,SIGKILL
); 
7207         rdbRemoveTempFile(server
.bgsavechildpid
); 
7209     rdbSave(server
.dbfilename
); 
7213 static redisSortOperation 
*createSortOperation(int type
, robj 
*pattern
) { 
7214     redisSortOperation 
*so 
= zmalloc(sizeof(*so
)); 
7216     so
->pattern 
= pattern
; 
7220 /* Return the value associated to the key with a name obtained 
7221  * substituting the first occurence of '*' in 'pattern' with 'subst'. 
7222  * The returned object will always have its refcount increased by 1 
7223  * when it is non-NULL. */ 
7224 static robj 
*lookupKeyByPattern(redisDb 
*db
, robj 
*pattern
, robj 
*subst
) { 
7227     robj keyobj
, fieldobj
, *o
; 
7228     int prefixlen
, sublen
, postfixlen
, fieldlen
; 
7229     /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */ 
7233         char buf
[REDIS_SORTKEY_MAX
+1]; 
7234     } keyname
, fieldname
; 
7236     /* If the pattern is "#" return the substitution object itself in order 
7237      * to implement the "SORT ... GET #" feature. */ 
7238     spat 
= pattern
->ptr
; 
7239     if (spat
[0] == '#' && spat
[1] == '\0') { 
7240         incrRefCount(subst
); 
7244     /* The substitution object may be specially encoded. If so we create 
7245      * a decoded object on the fly. Otherwise getDecodedObject will just 
7246      * increment the ref count, that we'll decrement later. */ 
7247     subst 
= getDecodedObject(subst
); 
7250     if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
; 
7251     p 
= strchr(spat
,'*'); 
7253         decrRefCount(subst
); 
7257     /* Find out if we're dealing with a hash dereference. */ 
7258     if ((f 
= strstr(p
+1, "->")) != NULL
) { 
7259         fieldlen 
= sdslen(spat
)-(f
-spat
); 
7260         /* this also copies \0 character */ 
7261         memcpy(fieldname
.buf
,f
+2,fieldlen
-1); 
7262         fieldname
.len 
= fieldlen
-2; 
7268     sublen 
= sdslen(ssub
); 
7269     postfixlen 
= sdslen(spat
)-(prefixlen
+1)-fieldlen
; 
7270     memcpy(keyname
.buf
,spat
,prefixlen
); 
7271     memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
); 
7272     memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
); 
7273     keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0'; 
7274     keyname
.len 
= prefixlen
+sublen
+postfixlen
; 
7275     decrRefCount(subst
); 
7277     /* Lookup substituted key */ 
7278     initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2)); 
7279     o 
= lookupKeyRead(db
,&keyobj
); 
7280     if (o 
== NULL
) return NULL
; 
7283         if (o
->type 
!= REDIS_HASH 
|| fieldname
.len 
< 1) return NULL
; 
7285         /* Retrieve value from hash by the field name. This operation 
7286          * already increases the refcount of the returned object. */ 
7287         initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2)); 
7288         o 
= hashTypeGet(o
, &fieldobj
); 
7290         if (o
->type 
!= REDIS_STRING
) return NULL
; 
7292         /* Every object that this function returns needs to have its refcount 
7293          * increased. sortCommand decreases it again. */ 
7300 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with 
7301  * the additional parameter is not standard but a BSD-specific we have to 
7302  * pass sorting parameters via the global 'server' structure */ 
7303 static int sortCompare(const void *s1
, const void *s2
) { 
7304     const redisSortObject 
*so1 
= s1
, *so2 
= s2
; 
7307     if (!server
.sort_alpha
) { 
7308         /* Numeric sorting. Here it's trivial as we precomputed scores */ 
7309         if (so1
->u
.score 
> so2
->u
.score
) { 
7311         } else if (so1
->u
.score 
< so2
->u
.score
) { 
7317         /* Alphanumeric sorting */ 
7318         if (server
.sort_bypattern
) { 
7319             if (!so1
->u
.cmpobj 
|| !so2
->u
.cmpobj
) { 
7320                 /* At least one compare object is NULL */ 
7321                 if (so1
->u
.cmpobj 
== so2
->u
.cmpobj
) 
7323                 else if (so1
->u
.cmpobj 
== NULL
) 
7328                 /* We have both the objects, use strcoll */ 
7329                 cmp 
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
); 
7332             /* Compare elements directly. */ 
7333             cmp 
= compareStringObjects(so1
->obj
,so2
->obj
); 
7336     return server
.sort_desc 
? -cmp 
: cmp
; 
7339 /* The SORT command is the most complex command in Redis. Warning: this code 
7340  * is optimized for speed and a bit less for readability */ 
7341 static void sortCommand(redisClient 
*c
) { 
7343     unsigned int outputlen 
= 0; 
7344     int desc 
= 0, alpha 
= 0; 
7345     int limit_start 
= 0, limit_count 
= -1, start
, end
; 
7346     int j
, dontsort 
= 0, vectorlen
; 
7347     int getop 
= 0; /* GET operation counter */ 
7348     robj 
*sortval
, *sortby 
= NULL
, *storekey 
= NULL
; 
7349     redisSortObject 
*vector
; /* Resulting vector to sort */ 
7351     /* Lookup the key to sort. It must be of the right types */ 
7352     sortval 
= lookupKeyRead(c
->db
,c
->argv
[1]); 
7353     if (sortval 
== NULL
) { 
7354         addReply(c
,shared
.emptymultibulk
); 
7357     if (sortval
->type 
!= REDIS_SET 
&& sortval
->type 
!= REDIS_LIST 
&& 
7358         sortval
->type 
!= REDIS_ZSET
) 
7360         addReply(c
,shared
.wrongtypeerr
); 
7364     /* Create a list of operations to perform for every sorted element. 
7365      * Operations can be GET/DEL/INCR/DECR */ 
7366     operations 
= listCreate(); 
7367     listSetFreeMethod(operations
,zfree
); 
7370     /* Now we need to protect sortval incrementing its count, in the future 
7371      * SORT may have options able to overwrite/delete keys during the sorting 
7372      * and the sorted key itself may get destroied */ 
7373     incrRefCount(sortval
); 
7375     /* The SORT command has an SQL-alike syntax, parse it */ 
7376     while(j 
< c
->argc
) { 
7377         int leftargs 
= c
->argc
-j
-1; 
7378         if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) { 
7380         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) { 
7382         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) { 
7384         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs 
>= 2) { 
7385             limit_start 
= atoi(c
->argv
[j
+1]->ptr
); 
7386             limit_count 
= atoi(c
->argv
[j
+2]->ptr
); 
7388         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs 
>= 1) { 
7389             storekey 
= c
->argv
[j
+1]; 
7391         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs 
>= 1) { 
7392             sortby 
= c
->argv
[j
+1]; 
7393             /* If the BY pattern does not contain '*', i.e. it is constant, 
7394              * we don't need to sort nor to lookup the weight keys. */ 
7395             if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort 
= 1; 
7397         } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs 
>= 1) { 
7398             listAddNodeTail(operations
,createSortOperation( 
7399                 REDIS_SORT_GET
,c
->argv
[j
+1])); 
7403             decrRefCount(sortval
); 
7404             listRelease(operations
); 
7405             addReply(c
,shared
.syntaxerr
); 
7411     /* Load the sorting vector with all the objects to sort */ 
7412     switch(sortval
->type
) { 
7413     case REDIS_LIST
: vectorlen 
= listTypeLength(sortval
); break; 
7414     case REDIS_SET
: vectorlen 
=  dictSize((dict
*)sortval
->ptr
); break; 
7415     case REDIS_ZSET
: vectorlen 
= dictSize(((zset
*)sortval
->ptr
)->dict
); break; 
7416     default: vectorlen 
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */ 
7418     vector 
= zmalloc(sizeof(redisSortObject
)*vectorlen
); 
7421     if (sortval
->type 
== REDIS_LIST
) { 
7422         listTypeIterator 
*li 
= listTypeInitIterator(sortval
,0,REDIS_TAIL
); 
7423         listTypeEntry entry
; 
7424         while(listTypeNext(li
,&entry
)) { 
7425             vector
[j
].obj 
= listTypeGet(&entry
); 
7426             vector
[j
].u
.score 
= 0; 
7427             vector
[j
].u
.cmpobj 
= NULL
; 
7430         listTypeReleaseIterator(li
); 
7436         if (sortval
->type 
== REDIS_SET
) { 
7439             zset 
*zs 
= sortval
->ptr
; 
7443         di 
= dictGetIterator(set
); 
7444         while((setele 
= dictNext(di
)) != NULL
) { 
7445             vector
[j
].obj 
= dictGetEntryKey(setele
); 
7446             vector
[j
].u
.score 
= 0; 
7447             vector
[j
].u
.cmpobj 
= NULL
; 
7450         dictReleaseIterator(di
); 
7452     redisAssert(j 
== vectorlen
); 
7454     /* Now it's time to load the right scores in the sorting vector */ 
7455     if (dontsort 
== 0) { 
7456         for (j 
= 0; j 
< vectorlen
; j
++) { 
7459                 /* lookup value to sort by */ 
7460                 byval 
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
); 
7461                 if (!byval
) continue; 
7463                 /* use object itself to sort by */ 
7464                 byval 
= vector
[j
].obj
; 
7468                 if (sortby
) vector
[j
].u
.cmpobj 
= getDecodedObject(byval
); 
7470                 if (byval
->encoding 
== REDIS_ENCODING_RAW
) { 
7471                     vector
[j
].u
.score 
= strtod(byval
->ptr
,NULL
); 
7472                 } else if (byval
->encoding 
== REDIS_ENCODING_INT
) { 
7473                     /* Don't need to decode the object if it's 
7474                      * integer-encoded (the only encoding supported) so 
7475                      * far. We can just cast it */ 
7476                     vector
[j
].u
.score 
= (long)byval
->ptr
; 
7478                     redisAssert(1 != 1); 
7482             /* when the object was retrieved using lookupKeyByPattern, 
7483              * its refcount needs to be decreased. */ 
7485                 decrRefCount(byval
); 
7490     /* We are ready to sort the vector... perform a bit of sanity check 
7491      * on the LIMIT option too. We'll use a partial version of quicksort. */ 
7492     start 
= (limit_start 
< 0) ? 0 : limit_start
; 
7493     end 
= (limit_count 
< 0) ? vectorlen
-1 : start
+limit_count
-1; 
7494     if (start 
>= vectorlen
) { 
7495         start 
= vectorlen
-1; 
7498     if (end 
>= vectorlen
) end 
= vectorlen
-1; 
7500     if (dontsort 
== 0) { 
7501         server
.sort_desc 
= desc
; 
7502         server
.sort_alpha 
= alpha
; 
7503         server
.sort_bypattern 
= sortby 
? 1 : 0; 
7504         if (sortby 
&& (start 
!= 0 || end 
!= vectorlen
-1)) 
7505             pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
); 
7507             qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
); 
7510     /* Send command output to the output buffer, performing the specified 
7511      * GET/DEL/INCR/DECR operations if any. */ 
7512     outputlen 
= getop 
? getop
*(end
-start
+1) : end
-start
+1; 
7513     if (storekey 
== NULL
) { 
7514         /* STORE option not specified, sent the sorting result to client */ 
7515         addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
)); 
7516         for (j 
= start
; j 
<= end
; j
++) { 
7520             if (!getop
) addReplyBulk(c
,vector
[j
].obj
); 
7521             listRewind(operations
,&li
); 
7522             while((ln 
= listNext(&li
))) { 
7523                 redisSortOperation 
*sop 
= ln
->value
; 
7524                 robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
7527                 if (sop
->type 
== REDIS_SORT_GET
) { 
7529                         addReply(c
,shared
.nullbulk
); 
7531                         addReplyBulk(c
,val
); 
7535                     redisAssert(sop
->type 
== REDIS_SORT_GET
); /* always fails */ 
7540         robj 
*sobj 
= createZiplistObject(); 
7542         /* STORE option specified, set the sorting result as a List object */ 
7543         for (j 
= start
; j 
<= end
; j
++) { 
7548                 listTypePush(sobj
,vector
[j
].obj
,REDIS_TAIL
); 
7550                 listRewind(operations
,&li
); 
7551                 while((ln 
= listNext(&li
))) { 
7552                     redisSortOperation 
*sop 
= ln
->value
; 
7553                     robj 
*val 
= lookupKeyByPattern(c
->db
,sop
->pattern
, 
7556                     if (sop
->type 
== REDIS_SORT_GET
) { 
7557                         if (!val
) val 
= createStringObject("",0); 
7559                         /* listTypePush does an incrRefCount, so we should take care 
7560                          * care of the incremented refcount caused by either 
7561                          * lookupKeyByPattern or createStringObject("",0) */ 
7562                         listTypePush(sobj
,val
,REDIS_TAIL
); 
7566                         redisAssert(sop
->type 
== REDIS_SORT_GET
); 
7571         dbReplace(c
->db
,storekey
,sobj
); 
7572         /* Note: we add 1 because the DB is dirty anyway since even if the 
7573          * SORT result is empty a new key is set and maybe the old content 
7575         server
.dirty 
+= 1+outputlen
; 
7576         addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
)); 
7580     if (sortval
->type 
== REDIS_LIST
) 
7581         for (j 
= 0; j 
< vectorlen
; j
++) 
7582             decrRefCount(vector
[j
].obj
); 
7583     decrRefCount(sortval
); 
7584     listRelease(operations
); 
7585     for (j 
= 0; j 
< vectorlen
; j
++) { 
7586         if (alpha 
&& vector
[j
].u
.cmpobj
) 
7587             decrRefCount(vector
[j
].u
.cmpobj
); 
7592 /* Convert an amount of bytes into a human readable string in the form 
7593  * of 100B, 2G, 100M, 4K, and so forth. */ 
7594 static void bytesToHuman(char *s
, unsigned long long n
) { 
7599         sprintf(s
,"%lluB",n
); 
7601     } else if (n 
< (1024*1024)) { 
7602         d 
= (double)n
/(1024); 
7603         sprintf(s
,"%.2fK",d
); 
7604     } else if (n 
< (1024LL*1024*1024)) { 
7605         d 
= (double)n
/(1024*1024); 
7606         sprintf(s
,"%.2fM",d
); 
7607     } else if (n 
< (1024LL*1024*1024*1024)) { 
7608         d 
= (double)n
/(1024LL*1024*1024); 
7609         sprintf(s
,"%.2fG",d
); 
7613 /* Create the string returned by the INFO command. This is decoupled 
7614  * by the INFO command itself as we need to report the same information 
7615  * on memory corruption problems. */ 
7616 static sds 
genRedisInfoString(void) { 
7618     time_t uptime 
= time(NULL
)-server
.stat_starttime
; 
7622     bytesToHuman(hmem
,zmalloc_used_memory()); 
7623     info 
= sdscatprintf(sdsempty(), 
7624         "redis_version:%s\r\n" 
7625         "redis_git_sha1:%s\r\n" 
7626         "redis_git_dirty:%d\r\n" 
7628         "multiplexing_api:%s\r\n" 
7629         "process_id:%ld\r\n" 
7630         "uptime_in_seconds:%ld\r\n" 
7631         "uptime_in_days:%ld\r\n" 
7632         "connected_clients:%d\r\n" 
7633         "connected_slaves:%d\r\n" 
7634         "blocked_clients:%d\r\n" 
7635         "used_memory:%zu\r\n" 
7636         "used_memory_human:%s\r\n" 
7637         "changes_since_last_save:%lld\r\n" 
7638         "bgsave_in_progress:%d\r\n" 
7639         "last_save_time:%ld\r\n" 
7640         "bgrewriteaof_in_progress:%d\r\n" 
7641         "total_connections_received:%lld\r\n" 
7642         "total_commands_processed:%lld\r\n" 
7643         "expired_keys:%lld\r\n" 
7644         "hash_max_zipmap_entries:%zu\r\n" 
7645         "hash_max_zipmap_value:%zu\r\n" 
7646         "pubsub_channels:%ld\r\n" 
7647         "pubsub_patterns:%u\r\n" 
7652         strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0, 
7653         (sizeof(long) == 8) ? "64" : "32", 
7658         listLength(server
.clients
)-listLength(server
.slaves
), 
7659         listLength(server
.slaves
), 
7660         server
.blpop_blocked_clients
, 
7661         zmalloc_used_memory(), 
7664         server
.bgsavechildpid 
!= -1, 
7666         server
.bgrewritechildpid 
!= -1, 
7667         server
.stat_numconnections
, 
7668         server
.stat_numcommands
, 
7669         server
.stat_expiredkeys
, 
7670         server
.hash_max_zipmap_entries
, 
7671         server
.hash_max_zipmap_value
, 
7672         dictSize(server
.pubsub_channels
), 
7673         listLength(server
.pubsub_patterns
), 
7674         server
.vm_enabled 
!= 0, 
7675         server
.masterhost 
== NULL 
? "master" : "slave" 
7677     if (server
.masterhost
) { 
7678         info 
= sdscatprintf(info
, 
7679             "master_host:%s\r\n" 
7680             "master_port:%d\r\n" 
7681             "master_link_status:%s\r\n" 
7682             "master_last_io_seconds_ago:%d\r\n" 
7685             (server
.replstate 
== REDIS_REPL_CONNECTED
) ? 
7687             server
.master 
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1 
7690     if (server
.vm_enabled
) { 
7692         info 
= sdscatprintf(info
, 
7693             "vm_conf_max_memory:%llu\r\n" 
7694             "vm_conf_page_size:%llu\r\n" 
7695             "vm_conf_pages:%llu\r\n" 
7696             "vm_stats_used_pages:%llu\r\n" 
7697             "vm_stats_swapped_objects:%llu\r\n" 
7698             "vm_stats_swappin_count:%llu\r\n" 
7699             "vm_stats_swappout_count:%llu\r\n" 
7700             "vm_stats_io_newjobs_len:%lu\r\n" 
7701             "vm_stats_io_processing_len:%lu\r\n" 
7702             "vm_stats_io_processed_len:%lu\r\n" 
7703             "vm_stats_io_active_threads:%lu\r\n" 
7704             "vm_stats_blocked_clients:%lu\r\n" 
7705             ,(unsigned long long) server
.vm_max_memory
, 
7706             (unsigned long long) server
.vm_page_size
, 
7707             (unsigned long long) server
.vm_pages
, 
7708             (unsigned long long) server
.vm_stats_used_pages
, 
7709             (unsigned long long) server
.vm_stats_swapped_objects
, 
7710             (unsigned long long) server
.vm_stats_swapins
, 
7711             (unsigned long long) server
.vm_stats_swapouts
, 
7712             (unsigned long) listLength(server
.io_newjobs
), 
7713             (unsigned long) listLength(server
.io_processing
), 
7714             (unsigned long) listLength(server
.io_processed
), 
7715             (unsigned long) server
.io_active_threads
, 
7716             (unsigned long) server
.vm_blocked_clients
 
7720     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
7721         long long keys
, vkeys
; 
7723         keys 
= dictSize(server
.db
[j
].dict
); 
7724         vkeys 
= dictSize(server
.db
[j
].expires
); 
7725         if (keys 
|| vkeys
) { 
7726             info 
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n", 
7733 static void infoCommand(redisClient 
*c
) { 
7734     sds info 
= genRedisInfoString(); 
7735     addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n", 
7736         (unsigned long)sdslen(info
))); 
7737     addReplySds(c
,info
); 
7738     addReply(c
,shared
.crlf
); 
7741 static void monitorCommand(redisClient 
*c
) { 
7742     /* ignore MONITOR if aleady slave or in monitor mode */ 
7743     if (c
->flags 
& REDIS_SLAVE
) return; 
7745     c
->flags 
|= (REDIS_SLAVE
|REDIS_MONITOR
); 
7747     listAddNodeTail(server
.monitors
,c
); 
7748     addReply(c
,shared
.ok
); 
7751 /* ================================= Expire ================================= */ 
7752 static int removeExpire(redisDb 
*db
, robj 
*key
) { 
7753     if (dictDelete(db
->expires
,key
->ptr
) == DICT_OK
) { 
7760 static int setExpire(redisDb 
*db
, robj 
*key
, time_t when
) { 
7761     sds copy 
= sdsdup(key
->ptr
); 
7762     if (dictAdd(db
->expires
,copy
,(void*)when
) == DICT_ERR
) { 
7770 /* Return the expire time of the specified key, or -1 if no expire 
7771  * is associated with this key (i.e. the key is non volatile) */ 
7772 static time_t getExpire(redisDb 
*db
, robj 
*key
) { 
7775     /* No expire? return ASAP */ 
7776     if (dictSize(db
->expires
) == 0 || 
7777        (de 
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return -1; 
7779     return (time_t) dictGetEntryVal(de
); 
7782 static int expireIfNeeded(redisDb 
*db
, robj 
*key
) { 
7786     /* No expire? return ASAP */ 
7787     if (dictSize(db
->expires
) == 0 || 
7788        (de 
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0; 
7790     /* Lookup the expire */ 
7791     when 
= (time_t) dictGetEntryVal(de
); 
7792     if (time(NULL
) <= when
) return 0; 
7794     /* Delete the key */ 
7796     server
.stat_expiredkeys
++; 
7800 static int deleteIfVolatile(redisDb 
*db
, robj 
*key
) { 
7803     /* No expire? return ASAP */ 
7804     if (dictSize(db
->expires
) == 0 || 
7805        (de 
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0; 
7807     /* Delete the key */ 
7809     server
.stat_expiredkeys
++; 
7810     dictDelete(db
->expires
,key
->ptr
); 
7811     return dictDelete(db
->dict
,key
->ptr
) == DICT_OK
; 
7814 static void expireGenericCommand(redisClient 
*c
, robj 
*key
, robj 
*param
, long offset
) { 
7818     if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return; 
7822     de 
= dictFind(c
->db
->dict
,key
->ptr
); 
7824         addReply(c
,shared
.czero
); 
7828         if (dbDelete(c
->db
,key
)) server
.dirty
++; 
7829         addReply(c
, shared
.cone
); 
7832         time_t when 
= time(NULL
)+seconds
; 
7833         if (setExpire(c
->db
,key
,when
)) { 
7834             addReply(c
,shared
.cone
); 
7837             addReply(c
,shared
.czero
); 
7843 static void expireCommand(redisClient 
*c
) { 
7844     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0); 
7847 static void expireatCommand(redisClient 
*c
) { 
7848     expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
)); 
7851 static void ttlCommand(redisClient 
*c
) { 
7855     expire 
= getExpire(c
->db
,c
->argv
[1]); 
7857         ttl 
= (int) (expire
-time(NULL
)); 
7858         if (ttl 
< 0) ttl 
= -1; 
7860     addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
)); 
7863 /* ================================ MULTI/EXEC ============================== */ 
7865 /* Client state initialization for MULTI/EXEC */ 
7866 static void initClientMultiState(redisClient 
*c
) { 
7867     c
->mstate
.commands 
= NULL
; 
7868     c
->mstate
.count 
= 0; 
7871 /* Release all the resources associated with MULTI/EXEC state */ 
7872 static void freeClientMultiState(redisClient 
*c
) { 
7875     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
7877         multiCmd 
*mc 
= c
->mstate
.commands
+j
; 
7879         for (i 
= 0; i 
< mc
->argc
; i
++) 
7880             decrRefCount(mc
->argv
[i
]); 
7883     zfree(c
->mstate
.commands
); 
7886 /* Add a new command into the MULTI commands queue */ 
7887 static void queueMultiCommand(redisClient 
*c
, struct redisCommand 
*cmd
) { 
7891     c
->mstate
.commands 
= zrealloc(c
->mstate
.commands
, 
7892             sizeof(multiCmd
)*(c
->mstate
.count
+1)); 
7893     mc 
= c
->mstate
.commands
+c
->mstate
.count
; 
7896     mc
->argv 
= zmalloc(sizeof(robj
*)*c
->argc
); 
7897     memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
); 
7898     for (j 
= 0; j 
< c
->argc
; j
++) 
7899         incrRefCount(mc
->argv
[j
]); 
7903 static void multiCommand(redisClient 
*c
) { 
7904     if (c
->flags 
& REDIS_MULTI
) { 
7905         addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n")); 
7908     c
->flags 
|= REDIS_MULTI
; 
7909     addReply(c
,shared
.ok
); 
7912 static void discardCommand(redisClient 
*c
) { 
7913     if (!(c
->flags 
& REDIS_MULTI
)) { 
7914         addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n")); 
7918     freeClientMultiState(c
); 
7919     initClientMultiState(c
); 
7920     c
->flags 
&= (~REDIS_MULTI
); 
7922     addReply(c
,shared
.ok
); 
7925 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand 
7926  * implememntation for more information. */ 
7927 static void execCommandReplicateMulti(redisClient 
*c
) { 
7928     struct redisCommand 
*cmd
; 
7929     robj 
*multistring 
= createStringObject("MULTI",5); 
7931     cmd 
= lookupCommand("multi"); 
7932     if (server
.appendonly
) 
7933         feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1); 
7934     if (listLength(server
.slaves
)) 
7935         replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1); 
7936     decrRefCount(multistring
); 
7939 static void execCommand(redisClient 
*c
) { 
7944     if (!(c
->flags 
& REDIS_MULTI
)) { 
7945         addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n")); 
7949     /* Check if we need to abort the EXEC if some WATCHed key was touched. 
7950      * A failed EXEC will return a multi bulk nil object. */ 
7951     if (c
->flags 
& REDIS_DIRTY_CAS
) { 
7952         freeClientMultiState(c
); 
7953         initClientMultiState(c
); 
7954         c
->flags 
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
); 
7956         addReply(c
,shared
.nullmultibulk
); 
7960     /* Replicate a MULTI request now that we are sure the block is executed. 
7961      * This way we'll deliver the MULTI/..../EXEC block as a whole and 
7962      * both the AOF and the replication link will have the same consistency 
7963      * and atomicity guarantees. */ 
7964     execCommandReplicateMulti(c
); 
7966     /* Exec all the queued commands */ 
7967     unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */ 
7968     orig_argv 
= c
->argv
; 
7969     orig_argc 
= c
->argc
; 
7970     addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
)); 
7971     for (j 
= 0; j 
< c
->mstate
.count
; j
++) { 
7972         c
->argc 
= c
->mstate
.commands
[j
].argc
; 
7973         c
->argv 
= c
->mstate
.commands
[j
].argv
; 
7974         call(c
,c
->mstate
.commands
[j
].cmd
); 
7976     c
->argv 
= orig_argv
; 
7977     c
->argc 
= orig_argc
; 
7978     freeClientMultiState(c
); 
7979     initClientMultiState(c
); 
7980     c
->flags 
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
); 
7981     /* Make sure the EXEC command is always replicated / AOF, since we 
7982      * always send the MULTI command (we can't know beforehand if the 
7983      * next operations will contain at least a modification to the DB). */ 
7987 /* =========================== Blocking Operations  ========================= */ 
7989 /* Currently Redis blocking operations support is limited to list POP ops, 
7990  * so the current implementation is not fully generic, but it is also not 
7991  * completely specific so it will not require a rewrite to support new 
7992  * kind of blocking operations in the future. 
7994  * Still it's important to note that list blocking operations can be already 
7995  * used as a notification mechanism in order to implement other blocking 
7996  * operations at application level, so there must be a very strong evidence 
7997  * of usefulness and generality before new blocking operations are implemented. 
7999  * This is how the current blocking POP works, we use BLPOP as example: 
8000  * - If the user calls BLPOP and the key exists and contains a non empty list 
8001  *   then LPOP is called instead. So BLPOP is semantically the same as LPOP 
8002  *   if there is not to block. 
8003  * - If instead BLPOP is called and the key does not exists or the list is 
8004  *   empty we need to block. In order to do so we remove the notification for 
8005  *   new data to read in the client socket (so that we'll not serve new 
8006  *   requests if the blocking request is not served). Also we put the client 
8007  *   in a dictionary (db->blocking_keys) mapping keys to a list of clients 
8008  *   blocking for this keys. 
8009  * - If a PUSH operation against a key with blocked clients waiting is 
8010  *   performed, we serve the first in the list: basically instead to push 
8011  *   the new element inside the list we return it to the (first / oldest) 
8012  *   blocking client, unblock the client, and remove it form the list. 
8014  * The above comment and the source code should be enough in order to understand 
8015  * the implementation and modify / fix it later. 
8018 /* Set a client in blocking mode for the specified key, with the specified 
8020 static void blockForKeys(redisClient 
*c
, robj 
**keys
, int numkeys
, time_t timeout
) { 
8025     c
->blocking_keys 
= zmalloc(sizeof(robj
*)*numkeys
); 
8026     c
->blocking_keys_num 
= numkeys
; 
8027     c
->blockingto 
= timeout
; 
8028     for (j 
= 0; j 
< numkeys
; j
++) { 
8029         /* Add the key in the client structure, to map clients -> keys */ 
8030         c
->blocking_keys
[j
] = keys
[j
]; 
8031         incrRefCount(keys
[j
]); 
8033         /* And in the other "side", to map keys -> clients */ 
8034         de 
= dictFind(c
->db
->blocking_keys
,keys
[j
]); 
8038             /* For every key we take a list of clients blocked for it */ 
8040             retval 
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
); 
8041             incrRefCount(keys
[j
]); 
8042             assert(retval 
== DICT_OK
); 
8044             l 
= dictGetEntryVal(de
); 
8046         listAddNodeTail(l
,c
); 
8048     /* Mark the client as a blocked client */ 
8049     c
->flags 
|= REDIS_BLOCKED
; 
8050     server
.blpop_blocked_clients
++; 
8053 /* Unblock a client that's waiting in a blocking operation such as BLPOP */ 
8054 static void unblockClientWaitingData(redisClient 
*c
) { 
8059     assert(c
->blocking_keys 
!= NULL
); 
8060     /* The client may wait for multiple keys, so unblock it for every key. */ 
8061     for (j 
= 0; j 
< c
->blocking_keys_num
; j
++) { 
8062         /* Remove this client from the list of clients waiting for this key. */ 
8063         de 
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]); 
8065         l 
= dictGetEntryVal(de
); 
8066         listDelNode(l
,listSearchKey(l
,c
)); 
8067         /* If the list is empty we need to remove it to avoid wasting memory */ 
8068         if (listLength(l
) == 0) 
8069             dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]); 
8070         decrRefCount(c
->blocking_keys
[j
]); 
8072     /* Cleanup the client structure */ 
8073     zfree(c
->blocking_keys
); 
8074     c
->blocking_keys 
= NULL
; 
8075     c
->flags 
&= (~REDIS_BLOCKED
); 
8076     server
.blpop_blocked_clients
--; 
8077     /* We want to process data if there is some command waiting 
8078      * in the input buffer. Note that this is safe even if 
8079      * unblockClientWaitingData() gets called from freeClient() because 
8080      * freeClient() will be smart enough to call this function 
8081      * *after* c->querybuf was set to NULL. */ 
8082     if (c
->querybuf 
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
); 
8085 /* This should be called from any function PUSHing into lists. 
8086  * 'c' is the "pushing client", 'key' is the key it is pushing data against, 
8087  * 'ele' is the element pushed. 
8089  * If the function returns 0 there was no client waiting for a list push 
8092  * If the function returns 1 there was a client waiting for a list push 
8093  * against this key, the element was passed to this client thus it's not 
8094  * needed to actually add it to the list and the caller should return asap. */ 
8095 static int handleClientsWaitingListPush(redisClient 
*c
, robj 
*key
, robj 
*ele
) { 
8096     struct dictEntry 
*de
; 
8097     redisClient 
*receiver
; 
8101     de 
= dictFind(c
->db
->blocking_keys
,key
); 
8102     if (de 
== NULL
) return 0; 
8103     l 
= dictGetEntryVal(de
); 
8106     receiver 
= ln
->value
; 
8108     addReplySds(receiver
,sdsnew("*2\r\n")); 
8109     addReplyBulk(receiver
,key
); 
8110     addReplyBulk(receiver
,ele
); 
8111     unblockClientWaitingData(receiver
); 
8115 /* Blocking RPOP/LPOP */ 
8116 static void blockingPopGenericCommand(redisClient 
*c
, int where
) { 
8121     for (j 
= 1; j 
< c
->argc
-1; j
++) { 
8122         o 
= lookupKeyWrite(c
->db
,c
->argv
[j
]); 
8124             if (o
->type 
!= REDIS_LIST
) { 
8125                 addReply(c
,shared
.wrongtypeerr
); 
8128                 list 
*list 
= o
->ptr
; 
8129                 if (listLength(list
) != 0) { 
8130                     /* If the list contains elements fall back to the usual 
8131                      * non-blocking POP operation */ 
8132                     robj 
*argv
[2], **orig_argv
; 
8135                     /* We need to alter the command arguments before to call 
8136                      * popGenericCommand() as the command takes a single key. */ 
8137                     orig_argv 
= c
->argv
; 
8138                     orig_argc 
= c
->argc
; 
8139                     argv
[1] = c
->argv
[j
]; 
8143                     /* Also the return value is different, we need to output 
8144                      * the multi bulk reply header and the key name. The 
8145                      * "real" command will add the last element (the value) 
8146                      * for us. If this souds like an hack to you it's just 
8147                      * because it is... */ 
8148                     addReplySds(c
,sdsnew("*2\r\n")); 
8149                     addReplyBulk(c
,argv
[1]); 
8150                     popGenericCommand(c
,where
); 
8152                     /* Fix the client structure with the original stuff */ 
8153                     c
->argv 
= orig_argv
; 
8154                     c
->argc 
= orig_argc
; 
8160     /* If the list is empty or the key does not exists we must block */ 
8161     timeout 
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10); 
8162     if (timeout 
> 0) timeout 
+= time(NULL
); 
8163     blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
); 
8166 static void blpopCommand(redisClient 
*c
) { 
8167     blockingPopGenericCommand(c
,REDIS_HEAD
); 
8170 static void brpopCommand(redisClient 
*c
) { 
8171     blockingPopGenericCommand(c
,REDIS_TAIL
); 
8174 /* =============================== Replication  ============================= */ 
8176 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
8177     ssize_t nwritten
, ret 
= size
; 
8178     time_t start 
= time(NULL
); 
8182         if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) { 
8183             nwritten 
= write(fd
,ptr
,size
); 
8184             if (nwritten 
== -1) return -1; 
8188         if ((time(NULL
)-start
) > timeout
) { 
8196 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
8197     ssize_t nread
, totread 
= 0; 
8198     time_t start 
= time(NULL
); 
8202         if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) { 
8203             nread 
= read(fd
,ptr
,size
); 
8204             if (nread 
== -1) return -1; 
8209         if ((time(NULL
)-start
) > timeout
) { 
8217 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) { 
8224         if (syncRead(fd
,&c
,1,timeout
) == -1) return -1; 
8227             if (nread 
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0'; 
8238 static void syncCommand(redisClient 
*c
) { 
8239     /* ignore SYNC if aleady slave or in monitor mode */ 
8240     if (c
->flags 
& REDIS_SLAVE
) return; 
8242     /* SYNC can't be issued when the server has pending data to send to 
8243      * the client about already issued commands. We need a fresh reply 
8244      * buffer registering the differences between the BGSAVE and the current 
8245      * dataset, so that we can copy to other slaves if needed. */ 
8246     if (listLength(c
->reply
) != 0) { 
8247         addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n")); 
8251     redisLog(REDIS_NOTICE
,"Slave ask for synchronization"); 
8252     /* Here we need to check if there is a background saving operation 
8253      * in progress, or if it is required to start one */ 
8254     if (server
.bgsavechildpid 
!= -1) { 
8255         /* Ok a background save is in progress. Let's check if it is a good 
8256          * one for replication, i.e. if there is another slave that is 
8257          * registering differences since the server forked to save */ 
8262         listRewind(server
.slaves
,&li
); 
8263         while((ln 
= listNext(&li
))) { 
8265             if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) break; 
8268             /* Perfect, the server is already registering differences for 
8269              * another slave. Set the right state, and copy the buffer. */ 
8270             listRelease(c
->reply
); 
8271             c
->reply 
= listDup(slave
->reply
); 
8272             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
8273             redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC"); 
8275             /* No way, we need to wait for the next BGSAVE in order to 
8276              * register differences */ 
8277             c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
8278             redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC"); 
8281         /* Ok we don't have a BGSAVE in progress, let's start one */ 
8282         redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC"); 
8283         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
8284             redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE"); 
8285             addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n")); 
8288         c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
8291     c
->flags 
|= REDIS_SLAVE
; 
8293     listAddNodeTail(server
.slaves
,c
); 
8297 static void sendBulkToSlave(aeEventLoop 
*el
, int fd
, void *privdata
, int mask
) { 
8298     redisClient 
*slave 
= privdata
; 
8300     REDIS_NOTUSED(mask
); 
8301     char buf
[REDIS_IOBUF_LEN
]; 
8302     ssize_t nwritten
, buflen
; 
8304     if (slave
->repldboff 
== 0) { 
8305         /* Write the bulk write count before to transfer the DB. In theory here 
8306          * we don't know how much room there is in the output buffer of the 
8307          * socket, but in pratice SO_SNDLOWAT (the minimum count for output 
8308          * operations) will never be smaller than the few bytes we need. */ 
8311         bulkcount 
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long) 
8313         if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
)) 
8321     lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
); 
8322     buflen 
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
); 
8324         redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s", 
8325             (buflen 
== 0) ? "premature EOF" : strerror(errno
)); 
8329     if ((nwritten 
= write(fd
,buf
,buflen
)) == -1) { 
8330         redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s", 
8335     slave
->repldboff 
+= nwritten
; 
8336     if (slave
->repldboff 
== slave
->repldbsize
) { 
8337         close(slave
->repldbfd
); 
8338         slave
->repldbfd 
= -1; 
8339         aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
8340         slave
->replstate 
= REDIS_REPL_ONLINE
; 
8341         if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, 
8342             sendReplyToClient
, slave
) == AE_ERR
) { 
8346         addReplySds(slave
,sdsempty()); 
8347         redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded"); 
8351 /* This function is called at the end of every backgrond saving. 
8352  * The argument bgsaveerr is REDIS_OK if the background saving succeeded 
8353  * otherwise REDIS_ERR is passed to the function. 
8355  * The goal of this function is to handle slaves waiting for a successful 
8356  * background saving in order to perform non-blocking synchronization. */ 
8357 static void updateSlavesWaitingBgsave(int bgsaveerr
) { 
8359     int startbgsave 
= 0; 
8362     listRewind(server
.slaves
,&li
); 
8363     while((ln 
= listNext(&li
))) { 
8364         redisClient 
*slave 
= ln
->value
; 
8366         if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) { 
8368             slave
->replstate 
= REDIS_REPL_WAIT_BGSAVE_END
; 
8369         } else if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_END
) { 
8370             struct redis_stat buf
; 
8372             if (bgsaveerr 
!= REDIS_OK
) { 
8374                 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error"); 
8377             if ((slave
->repldbfd 
= open(server
.dbfilename
,O_RDONLY
)) == -1 || 
8378                 redis_fstat(slave
->repldbfd
,&buf
) == -1) { 
8380                 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
)); 
8383             slave
->repldboff 
= 0; 
8384             slave
->repldbsize 
= buf
.st_size
; 
8385             slave
->replstate 
= REDIS_REPL_SEND_BULK
; 
8386             aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
); 
8387             if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) { 
8394         if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) { 
8397             listRewind(server
.slaves
,&li
); 
8398             redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed"); 
8399             while((ln 
= listNext(&li
))) { 
8400                 redisClient 
*slave 
= ln
->value
; 
8402                 if (slave
->replstate 
== REDIS_REPL_WAIT_BGSAVE_START
) 
8409 static int syncWithMaster(void) { 
8410     char buf
[1024], tmpfile
[256], authcmd
[1024]; 
8412     int fd 
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
); 
8413     int dfd
, maxtries 
= 5; 
8416         redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s", 
8421     /* AUTH with the master if required. */ 
8422     if(server
.masterauth
) { 
8423         snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
); 
8424         if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) { 
8426             redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s", 
8430         /* Read the AUTH result.  */ 
8431         if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
8433             redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s", 
8437         if (buf
[0] != '+') { 
8439             redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?"); 
8444     /* Issue the SYNC command */ 
8445     if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) { 
8447         redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s", 
8451     /* Read the bulk write count */ 
8452     if (syncReadLine(fd
,buf
,1024,3600) == -1) { 
8454         redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s", 
8458     if (buf
[0] != '$') { 
8460         redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?"); 
8463     dumpsize 
= strtol(buf
+1,NULL
,10); 
8464     redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
); 
8465     /* Read the bulk write data on a temp file */ 
8467         snprintf(tmpfile
,256, 
8468             "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid()); 
8469         dfd 
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644); 
8470         if (dfd 
!= -1) break; 
8475         redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
)); 
8479         int nread
, nwritten
; 
8481         nread 
= read(fd
,buf
,(dumpsize 
< 1024)?dumpsize
:1024); 
8483             redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s", 
8489         nwritten 
= write(dfd
,buf
,nread
); 
8490         if (nwritten 
== -1) { 
8491             redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
)); 
8499     if (rename(tmpfile
,server
.dbfilename
) == -1) { 
8500         redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
)); 
8506     if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
8507         redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk"); 
8511     server
.master 
= createClient(fd
); 
8512     server
.master
->flags 
|= REDIS_MASTER
; 
8513     server
.master
->authenticated 
= 1; 
8514     server
.replstate 
= REDIS_REPL_CONNECTED
; 
8518 static void slaveofCommand(redisClient 
*c
) { 
8519     if (!strcasecmp(c
->argv
[1]->ptr
,"no") && 
8520         !strcasecmp(c
->argv
[2]->ptr
,"one")) { 
8521         if (server
.masterhost
) { 
8522             sdsfree(server
.masterhost
); 
8523             server
.masterhost 
= NULL
; 
8524             if (server
.master
) freeClient(server
.master
); 
8525             server
.replstate 
= REDIS_REPL_NONE
; 
8526             redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)"); 
8529         sdsfree(server
.masterhost
); 
8530         server
.masterhost 
= sdsdup(c
->argv
[1]->ptr
); 
8531         server
.masterport 
= atoi(c
->argv
[2]->ptr
); 
8532         if (server
.master
) freeClient(server
.master
); 
8533         server
.replstate 
= REDIS_REPL_CONNECT
; 
8534         redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)", 
8535             server
.masterhost
, server
.masterport
); 
8537     addReply(c
,shared
.ok
); 
8540 /* ============================ Maxmemory directive  ======================== */ 
8542 /* Try to free one object form the pre-allocated objects free list. 
8543  * This is useful under low mem conditions as by default we take 1 million 
8544  * free objects allocated. On success REDIS_OK is returned, otherwise 
8546 static int tryFreeOneObjectFromFreelist(void) { 
8549     if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
); 
8550     if (listLength(server
.objfreelist
)) { 
8551         listNode 
*head 
= listFirst(server
.objfreelist
); 
8552         o 
= listNodeValue(head
); 
8553         listDelNode(server
.objfreelist
,head
); 
8554         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
8558         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
); 
8563 /* This function gets called when 'maxmemory' is set on the config file to limit 
8564  * the max memory used by the server, and we are out of memory. 
8565  * This function will try to, in order: 
8567  * - Free objects from the free list 
8568  * - Try to remove keys with an EXPIRE set 
8570  * It is not possible to free enough memory to reach used-memory < maxmemory 
8571  * the server will start refusing commands that will enlarge even more the 
8574 static void freeMemoryIfNeeded(void) { 
8575     while (server
.maxmemory 
&& zmalloc_used_memory() > server
.maxmemory
) { 
8576         int j
, k
, freed 
= 0; 
8578         if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue; 
8579         for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8581             robj 
*minkey 
= NULL
; 
8582             struct dictEntry 
*de
; 
8584             if (dictSize(server
.db
[j
].expires
)) { 
8586                 /* From a sample of three keys drop the one nearest to 
8587                  * the natural expire */ 
8588                 for (k 
= 0; k 
< 3; k
++) { 
8591                     de 
= dictGetRandomKey(server
.db
[j
].expires
); 
8592                     t 
= (time_t) dictGetEntryVal(de
); 
8593                     if (minttl 
== -1 || t 
< minttl
) { 
8594                         minkey 
= dictGetEntryKey(de
); 
8598                 dbDelete(server
.db
+j
,minkey
); 
8601         if (!freed
) return; /* nothing to free... */ 
8605 /* ============================== Append Only file ========================== */ 
8607 /* Called when the user switches from "appendonly yes" to "appendonly no" 
8608  * at runtime using the CONFIG command. */ 
8609 static void stopAppendOnly(void) { 
8610     flushAppendOnlyFile(); 
8611     aof_fsync(server
.appendfd
); 
8612     close(server
.appendfd
); 
8614     server
.appendfd 
= -1; 
8615     server
.appendseldb 
= -1; 
8616     server
.appendonly 
= 0; 
8617     /* rewrite operation in progress? kill it, wait child exit */ 
8618     if (server
.bgsavechildpid 
!= -1) { 
8621         if (kill(server
.bgsavechildpid
,SIGKILL
) != -1) 
8622             wait3(&statloc
,0,NULL
); 
8623         /* reset the buffer accumulating changes while the child saves */ 
8624         sdsfree(server
.bgrewritebuf
); 
8625         server
.bgrewritebuf 
= sdsempty(); 
8626         server
.bgsavechildpid 
= -1; 
8630 /* Called when the user switches from "appendonly no" to "appendonly yes" 
8631  * at runtime using the CONFIG command. */ 
8632 static int startAppendOnly(void) { 
8633     server
.appendonly 
= 1; 
8634     server
.lastfsync 
= time(NULL
); 
8635     server
.appendfd 
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644); 
8636     if (server
.appendfd 
== -1) { 
8637         redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
)); 
8640     if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) { 
8641         server
.appendonly 
= 0; 
8642         close(server
.appendfd
); 
8643         redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
)); 
8649 /* Write the append only file buffer on disk. 
8651  * Since we are required to write the AOF before replying to the client, 
8652  * and the only way the client socket can get a write is entering when the 
8653  * the event loop, we accumulate all the AOF writes in a memory 
8654  * buffer and write it on disk using this function just before entering 
8655  * the event loop again. */ 
8656 static void flushAppendOnlyFile(void) { 
8660     if (sdslen(server
.aofbuf
) == 0) return; 
8662     /* We want to perform a single write. This should be guaranteed atomic 
8663      * at least if the filesystem we are writing is a real physical one. 
8664      * While this will save us against the server being killed I don't think 
8665      * there is much to do about the whole server stopping for power problems 
8667      nwritten 
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
)); 
8668      if (nwritten 
!= (signed)sdslen(server
.aofbuf
)) { 
8669         /* Ooops, we are in troubles. The best thing to do for now is 
8670          * aborting instead of giving the illusion that everything is 
8671          * working as expected. */ 
8672          if (nwritten 
== -1) { 
8673             redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
)); 
8675             redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
)); 
8679     sdsfree(server
.aofbuf
); 
8680     server
.aofbuf 
= sdsempty(); 
8682     /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have 
8683      * childs performing heavy I/O on disk. */ 
8684     if (server
.no_appendfsync_on_rewrite 
&& 
8685         (server
.bgrewritechildpid 
!= -1 || server
.bgsavechildpid 
!= -1)) 
8687     /* Fsync if needed */ 
8689     if (server
.appendfsync 
== APPENDFSYNC_ALWAYS 
|| 
8690         (server
.appendfsync 
== APPENDFSYNC_EVERYSEC 
&& 
8691          now
-server
.lastfsync 
> 1)) 
8693         /* aof_fsync is defined as fdatasync() for Linux in order to avoid 
8694          * flushing metadata. */ 
8695         aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */ 
8696         server
.lastfsync 
= now
; 
8700 static sds 
catAppendOnlyGenericCommand(sds buf
, int argc
, robj 
**argv
) { 
8702     buf 
= sdscatprintf(buf
,"*%d\r\n",argc
); 
8703     for (j 
= 0; j 
< argc
; j
++) { 
8704         robj 
*o 
= getDecodedObject(argv
[j
]); 
8705         buf 
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
)); 
8706         buf 
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
)); 
8707         buf 
= sdscatlen(buf
,"\r\n",2); 
8713 static sds 
catAppendOnlyExpireAtCommand(sds buf
, robj 
*key
, robj 
*seconds
) { 
8718     /* Make sure we can use strtol */ 
8719     seconds 
= getDecodedObject(seconds
); 
8720     when 
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10); 
8721     decrRefCount(seconds
); 
8723     argv
[0] = createStringObject("EXPIREAT",8); 
8725     argv
[2] = createObject(REDIS_STRING
, 
8726         sdscatprintf(sdsempty(),"%ld",when
)); 
8727     buf 
= catAppendOnlyGenericCommand(buf
, argc
, argv
); 
8728     decrRefCount(argv
[0]); 
8729     decrRefCount(argv
[2]); 
8733 static void feedAppendOnlyFile(struct redisCommand 
*cmd
, int dictid
, robj 
**argv
, int argc
) { 
8734     sds buf 
= sdsempty(); 
8737     /* The DB this command was targetting is not the same as the last command 
8738      * we appendend. To issue a SELECT command is needed. */ 
8739     if (dictid 
!= server
.appendseldb
) { 
8742         snprintf(seldb
,sizeof(seldb
),"%d",dictid
); 
8743         buf 
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", 
8744             (unsigned long)strlen(seldb
),seldb
); 
8745         server
.appendseldb 
= dictid
; 
8748     if (cmd
->proc 
== expireCommand
) { 
8749         /* Translate EXPIRE into EXPIREAT */ 
8750         buf 
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]); 
8751     } else if (cmd
->proc 
== setexCommand
) { 
8752         /* Translate SETEX to SET and EXPIREAT */ 
8753         tmpargv
[0] = createStringObject("SET",3); 
8754         tmpargv
[1] = argv
[1]; 
8755         tmpargv
[2] = argv
[3]; 
8756         buf 
= catAppendOnlyGenericCommand(buf
,3,tmpargv
); 
8757         decrRefCount(tmpargv
[0]); 
8758         buf 
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]); 
8760         buf 
= catAppendOnlyGenericCommand(buf
,argc
,argv
); 
8763     /* Append to the AOF buffer. This will be flushed on disk just before 
8764      * of re-entering the event loop, so before the client will get a 
8765      * positive reply about the operation performed. */ 
8766     server
.aofbuf 
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
)); 
8768     /* If a background append only file rewriting is in progress we want to 
8769      * accumulate the differences between the child DB and the current one 
8770      * in a buffer, so that when the child process will do its work we 
8771      * can append the differences to the new append only file. */ 
8772     if (server
.bgrewritechildpid 
!= -1) 
8773         server
.bgrewritebuf 
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
)); 
8778 /* In Redis commands are always executed in the context of a client, so in 
8779  * order to load the append only file we need to create a fake client. */ 
8780 static struct redisClient 
*createFakeClient(void) { 
8781     struct redisClient 
*c 
= zmalloc(sizeof(*c
)); 
8785     c
->querybuf 
= sdsempty(); 
8789     /* We set the fake client as a slave waiting for the synchronization 
8790      * so that Redis will not try to send replies to this client. */ 
8791     c
->replstate 
= REDIS_REPL_WAIT_BGSAVE_START
; 
8792     c
->reply 
= listCreate(); 
8793     listSetFreeMethod(c
->reply
,decrRefCount
); 
8794     listSetDupMethod(c
->reply
,dupClientReplyValue
); 
8795     initClientMultiState(c
); 
8799 static void freeFakeClient(struct redisClient 
*c
) { 
8800     sdsfree(c
->querybuf
); 
8801     listRelease(c
->reply
); 
8802     freeClientMultiState(c
); 
8806 /* Replay the append log file. On error REDIS_OK is returned. On non fatal 
8807  * error (the append only file is zero-length) REDIS_ERR is returned. On 
8808  * fatal error an error message is logged and the program exists. */ 
8809 int loadAppendOnlyFile(char *filename
) { 
8810     struct redisClient 
*fakeClient
; 
8811     FILE *fp 
= fopen(filename
,"r"); 
8812     struct redis_stat sb
; 
8813     int appendonly 
= server
.appendonly
; 
8815     if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size 
== 0) 
8819         redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
)); 
8823     /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI 
8824      * to the same file we're about to read. */ 
8825     server
.appendonly 
= 0; 
8827     fakeClient 
= createFakeClient(); 
8834         struct redisCommand 
*cmd
; 
8837         if (fgets(buf
,sizeof(buf
),fp
) == NULL
) { 
8843         if (buf
[0] != '*') goto fmterr
; 
8845         argv 
= zmalloc(sizeof(robj
*)*argc
); 
8846         for (j 
= 0; j 
< argc
; j
++) { 
8847             if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
; 
8848             if (buf
[0] != '$') goto fmterr
; 
8849             len 
= strtol(buf
+1,NULL
,10); 
8850             argsds 
= sdsnewlen(NULL
,len
); 
8851             if (len 
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
; 
8852             argv
[j
] = createObject(REDIS_STRING
,argsds
); 
8853             if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */ 
8856         /* Command lookup */ 
8857         cmd 
= lookupCommand(argv
[0]->ptr
); 
8859             redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
); 
8862         /* Try object encoding */ 
8863         if (cmd
->flags 
& REDIS_CMD_BULK
) 
8864             argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]); 
8865         /* Run the command in the context of a fake client */ 
8866         fakeClient
->argc 
= argc
; 
8867         fakeClient
->argv 
= argv
; 
8868         cmd
->proc(fakeClient
); 
8869         /* Discard the reply objects list from the fake client */ 
8870         while(listLength(fakeClient
->reply
)) 
8871             listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
)); 
8872         /* Clean up, ready for the next command */ 
8873         for (j 
= 0; j 
< argc
; j
++) decrRefCount(argv
[j
]); 
8875         /* Handle swapping while loading big datasets when VM is on */ 
8877         if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32) 
8880         if (server
.vm_enabled 
&& force_swapout
) { 
8881             while (zmalloc_used_memory() > server
.vm_max_memory
) { 
8882                 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break; 
8887     /* This point can only be reached when EOF is reached without errors. 
8888      * If the client is in the middle of a MULTI/EXEC, log error and quit. */ 
8889     if (fakeClient
->flags 
& REDIS_MULTI
) goto readerr
; 
8892     freeFakeClient(fakeClient
); 
8893     server
.appendonly 
= appendonly
; 
8898         redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file"); 
8900         redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
)); 
8904     redisLog(REDIS_WARNING
,"Bad file format reading the append only file"); 
8908 /* Write binary-safe string into a file in the bulkformat 
8909  * $<count>\r\n<payload>\r\n */ 
8910 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) { 
8914     clen 
= 1+ll2string(cbuf
+1,sizeof(cbuf
)-1,len
); 
8915     cbuf
[clen
++] = '\r'; 
8916     cbuf
[clen
++] = '\n'; 
8917     if (fwrite(cbuf
,clen
,1,fp
) == 0) return 0; 
8918     if (len 
> 0 && fwrite(s
,len
,1,fp
) == 0) return 0; 
8919     if (fwrite("\r\n",2,1,fp
) == 0) return 0; 
8923 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */ 
8924 static int fwriteBulkDouble(FILE *fp
, double d
) { 
8925     char buf
[128], dbuf
[128]; 
8927     snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
); 
8928     snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2); 
8929     if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0; 
8930     if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0; 
8934 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */ 
8935 static int fwriteBulkLongLong(FILE *fp
, long long l
) { 
8936     char bbuf
[128], lbuf
[128]; 
8937     unsigned int blen
, llen
; 
8938     llen 
= ll2string(lbuf
,32,l
); 
8939     blen 
= snprintf(bbuf
,sizeof(bbuf
),"$%u\r\n%s\r\n",llen
,lbuf
); 
8940     if (fwrite(bbuf
,blen
,1,fp
) == 0) return 0; 
8944 /* Delegate writing an object to writing a bulk string or bulk long long. */ 
8945 static int fwriteBulkObject(FILE *fp
, robj 
*obj
) { 
8946     /* Avoid using getDecodedObject to help copy-on-write (we are often 
8947      * in a child process when this function is called). */ 
8948     if (obj
->encoding 
== REDIS_ENCODING_INT
) { 
8949         return fwriteBulkLongLong(fp
,(long)obj
->ptr
); 
8950     } else if (obj
->encoding 
== REDIS_ENCODING_RAW
) { 
8951         return fwriteBulkString(fp
,obj
->ptr
,sdslen(obj
->ptr
)); 
8953         redisPanic("Unknown string encoding"); 
8957 /* Write a sequence of commands able to fully rebuild the dataset into 
8958  * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */ 
8959 static int rewriteAppendOnlyFile(char *filename
) { 
8960     dictIterator 
*di 
= NULL
; 
8965     time_t now 
= time(NULL
); 
8967     /* Note that we have to use a different temp name here compared to the 
8968      * one used by rewriteAppendOnlyFileBackground() function. */ 
8969     snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid()); 
8970     fp 
= fopen(tmpfile
,"w"); 
8972         redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
)); 
8975     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
8976         char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n"; 
8977         redisDb 
*db 
= server
.db
+j
; 
8979         if (dictSize(d
) == 0) continue; 
8980         di 
= dictGetIterator(d
); 
8986         /* SELECT the new DB */ 
8987         if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
; 
8988         if (fwriteBulkLongLong(fp
,j
) == 0) goto werr
; 
8990         /* Iterate this DB writing every entry */ 
8991         while((de 
= dictNext(di
)) != NULL
) { 
8992             sds keystr 
= dictGetEntryKey(de
); 
8997             keystr 
= dictGetEntryKey(de
); 
8998             o 
= dictGetEntryVal(de
); 
8999             initStaticStringObject(key
,keystr
); 
9000             /* If the value for this key is swapped, load a preview in memory. 
9001              * We use a "swapped" flag to remember if we need to free the 
9002              * value object instead to just increment the ref count anyway 
9003              * in order to avoid copy-on-write of pages if we are forked() */ 
9004             if (!server
.vm_enabled 
|| o
->storage 
== REDIS_VM_MEMORY 
|| 
9005                 o
->storage 
== REDIS_VM_SWAPPING
) { 
9008                 o 
= vmPreviewObject(o
); 
9011             expiretime 
= getExpire(db
,&key
); 
9013             /* Save the key and associated value */ 
9014             if (o
->type 
== REDIS_STRING
) { 
9015                 /* Emit a SET command */ 
9016                 char cmd
[]="*3\r\n$3\r\nSET\r\n"; 
9017                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9019                 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9020                 if (fwriteBulkObject(fp
,o
) == 0) goto werr
; 
9021             } else if (o
->type 
== REDIS_LIST
) { 
9022                 /* Emit the RPUSHes needed to rebuild the list */ 
9023                 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n"; 
9024                 if (o
->encoding 
== REDIS_ENCODING_ZIPLIST
) { 
9025                     unsigned char *zl 
= o
->ptr
; 
9026                     unsigned char *p 
= ziplistIndex(zl
,0); 
9027                     unsigned char *vstr
; 
9031                     while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) { 
9032                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9033                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9035                             if (fwriteBulkString(fp
,(char*)vstr
,vlen
) == 0) 
9038                             if (fwriteBulkLongLong(fp
,vlong
) == 0) 
9041                         p 
= ziplistNext(zl
,p
); 
9043                 } else if (o
->encoding 
== REDIS_ENCODING_LIST
) { 
9044                     list 
*list 
= o
->ptr
; 
9048                     listRewind(list
,&li
); 
9049                     while((ln 
= listNext(&li
))) { 
9050                         robj 
*eleobj 
= listNodeValue(ln
); 
9052                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9053                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9054                         if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
9057                     redisPanic("Unknown list encoding"); 
9059             } else if (o
->type 
== REDIS_SET
) { 
9060                 /* Emit the SADDs needed to rebuild the set */ 
9062                 dictIterator 
*di 
= dictGetIterator(set
); 
9065                 while((de 
= dictNext(di
)) != NULL
) { 
9066                     char cmd
[]="*3\r\n$4\r\nSADD\r\n"; 
9067                     robj 
*eleobj 
= dictGetEntryKey(de
); 
9069                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9070                     if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9071                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
9073                 dictReleaseIterator(di
); 
9074             } else if (o
->type 
== REDIS_ZSET
) { 
9075                 /* Emit the ZADDs needed to rebuild the sorted set */ 
9077                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
9080                 while((de 
= dictNext(di
)) != NULL
) { 
9081                     char cmd
[]="*4\r\n$4\r\nZADD\r\n"; 
9082                     robj 
*eleobj 
= dictGetEntryKey(de
); 
9083                     double *score 
= dictGetEntryVal(de
); 
9085                     if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9086                     if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9087                     if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
; 
9088                     if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
; 
9090                 dictReleaseIterator(di
); 
9091             } else if (o
->type 
== REDIS_HASH
) { 
9092                 char cmd
[]="*4\r\n$4\r\nHSET\r\n"; 
9094                 /* Emit the HSETs needed to rebuild the hash */ 
9095                 if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
9096                     unsigned char *p 
= zipmapRewind(o
->ptr
); 
9097                     unsigned char *field
, *val
; 
9098                     unsigned int flen
, vlen
; 
9100                     while((p 
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) { 
9101                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9102                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9103                         if (fwriteBulkString(fp
,(char*)field
,flen
) == -1) 
9105                         if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1) 
9109                     dictIterator 
*di 
= dictGetIterator(o
->ptr
); 
9112                     while((de 
= dictNext(di
)) != NULL
) { 
9113                         robj 
*field 
= dictGetEntryKey(de
); 
9114                         robj 
*val 
= dictGetEntryVal(de
); 
9116                         if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9117                         if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9118                         if (fwriteBulkObject(fp
,field
) == -1) return -1; 
9119                         if (fwriteBulkObject(fp
,val
) == -1) return -1; 
9121                     dictReleaseIterator(di
); 
9124                 redisPanic("Unknown object type"); 
9126             /* Save the expire time */ 
9127             if (expiretime 
!= -1) { 
9128                 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n"; 
9129                 /* If this key is already expired skip it */ 
9130                 if (expiretime 
< now
) continue; 
9131                 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
; 
9132                 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
; 
9133                 if (fwriteBulkLongLong(fp
,expiretime
) == 0) goto werr
; 
9135             if (swapped
) decrRefCount(o
); 
9137         dictReleaseIterator(di
); 
9140     /* Make sure data will not remain on the OS's output buffers */ 
9142     aof_fsync(fileno(fp
)); 
9145     /* Use RENAME to make sure the DB file is changed atomically only 
9146      * if the generate DB file is ok. */ 
9147     if (rename(tmpfile
,filename
) == -1) { 
9148         redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
)); 
9152     redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed"); 
9158     redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
)); 
9159     if (di
) dictReleaseIterator(di
); 
9163 /* This is how rewriting of the append only file in background works: 
9165  * 1) The user calls BGREWRITEAOF 
9166  * 2) Redis calls this function, that forks(): 
9167  *    2a) the child rewrite the append only file in a temp file. 
9168  *    2b) the parent accumulates differences in server.bgrewritebuf. 
9169  * 3) When the child finished '2a' exists. 
9170  * 4) The parent will trap the exit code, if it's OK, will append the 
9171  *    data accumulated into server.bgrewritebuf into the temp file, and 
9172  *    finally will rename(2) the temp file in the actual file name. 
9173  *    The the new file is reopened as the new append only file. Profit! 
9175 static int rewriteAppendOnlyFileBackground(void) { 
9178     if (server
.bgrewritechildpid 
!= -1) return REDIS_ERR
; 
9179     if (server
.vm_enabled
) waitEmptyIOJobsQueue(); 
9180     if ((childpid 
= fork()) == 0) { 
9184         if (server
.vm_enabled
) vmReopenSwapFile(); 
9186         snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); 
9187         if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) { 
9194         if (childpid 
== -1) { 
9195             redisLog(REDIS_WARNING
, 
9196                 "Can't rewrite append only file in background: fork: %s", 
9200         redisLog(REDIS_NOTICE
, 
9201             "Background append only file rewriting started by pid %d",childpid
); 
9202         server
.bgrewritechildpid 
= childpid
; 
9203         updateDictResizePolicy(); 
9204         /* We set appendseldb to -1 in order to force the next call to the 
9205          * feedAppendOnlyFile() to issue a SELECT command, so the differences 
9206          * accumulated by the parent into server.bgrewritebuf will start 
9207          * with a SELECT statement and it will be safe to merge. */ 
9208         server
.appendseldb 
= -1; 
9211     return REDIS_OK
; /* unreached */ 
9214 static void bgrewriteaofCommand(redisClient 
*c
) { 
9215     if (server
.bgrewritechildpid 
!= -1) { 
9216         addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n")); 
9219     if (rewriteAppendOnlyFileBackground() == REDIS_OK
) { 
9220         char *status 
= "+Background append only file rewriting started\r\n"; 
9221         addReplySds(c
,sdsnew(status
)); 
9223         addReply(c
,shared
.err
); 
9227 static void aofRemoveTempFile(pid_t childpid
) { 
9230     snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
); 
9234 /* Virtual Memory is composed mainly of two subsystems: 
9235  * - Blocking Virutal Memory 
9236  * - Threaded Virtual Memory I/O 
9237  * The two parts are not fully decoupled, but functions are split among two 
9238  * different sections of the source code (delimited by comments) in order to 
9239  * make more clear what functionality is about the blocking VM and what about 
9240  * the threaded (not blocking) VM. 
9244  * Redis VM is a blocking VM (one that blocks reading swapped values from 
9245  * disk into memory when a value swapped out is needed in memory) that is made 
9246  * unblocking by trying to examine the command argument vector in order to 
9247  * load in background values that will likely be needed in order to exec 
9248  * the command. The command is executed only once all the relevant keys 
9249  * are loaded into memory. 
9251  * This basically is almost as simple of a blocking VM, but almost as parallel 
9252  * as a fully non-blocking VM. 
9255 /* =================== Virtual Memory - Blocking Side  ====================== */ 
9257 /* Create a VM pointer object. This kind of objects are used in place of 
9258  * values in the key -> value hash table, for swapped out objects. */ 
9259 static vmpointer 
*createVmPointer(int vtype
) { 
9260     vmpointer 
*vp 
= zmalloc(sizeof(vmpointer
)); 
9262     vp
->type 
= REDIS_VMPOINTER
; 
9263     vp
->storage 
= REDIS_VM_SWAPPED
; 
9268 static void vmInit(void) { 
9274     if (server
.vm_max_threads 
!= 0) 
9275         zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */ 
9277     redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
); 
9278     /* Try to open the old swap file, otherwise create it */ 
9279     if ((server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) { 
9280         server
.vm_fp 
= fopen(server
.vm_swap_file
,"w+b"); 
9282     if (server
.vm_fp 
== NULL
) { 
9283         redisLog(REDIS_WARNING
, 
9284             "Can't open the swap file: %s. Exiting.", 
9288     server
.vm_fd 
= fileno(server
.vm_fp
); 
9289     /* Lock the swap file for writing, this is useful in order to avoid 
9290      * another instance to use the same swap file for a config error. */ 
9291     fl
.l_type 
= F_WRLCK
; 
9292     fl
.l_whence 
= SEEK_SET
; 
9293     fl
.l_start 
= fl
.l_len 
= 0; 
9294     if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) { 
9295         redisLog(REDIS_WARNING
, 
9296             "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
)); 
9300     server
.vm_next_page 
= 0; 
9301     server
.vm_near_pages 
= 0; 
9302     server
.vm_stats_used_pages 
= 0; 
9303     server
.vm_stats_swapped_objects 
= 0; 
9304     server
.vm_stats_swapouts 
= 0; 
9305     server
.vm_stats_swapins 
= 0; 
9306     totsize 
= server
.vm_pages
*server
.vm_page_size
; 
9307     redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
); 
9308     if (ftruncate(server
.vm_fd
,totsize
) == -1) { 
9309         redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.", 
9313         redisLog(REDIS_NOTICE
,"Swap file allocated with success"); 
9315     server
.vm_bitmap 
= zmalloc((server
.vm_pages
+7)/8); 
9316     redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages", 
9317         (long long) (server
.vm_pages
+7)/8, server
.vm_pages
); 
9318     memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8); 
9320     /* Initialize threaded I/O (used by Virtual Memory) */ 
9321     server
.io_newjobs 
= listCreate(); 
9322     server
.io_processing 
= listCreate(); 
9323     server
.io_processed 
= listCreate(); 
9324     server
.io_ready_clients 
= listCreate(); 
9325     pthread_mutex_init(&server
.io_mutex
,NULL
); 
9326     pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
); 
9327     pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
); 
9328     server
.io_active_threads 
= 0; 
9329     if (pipe(pipefds
) == -1) { 
9330         redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting." 
9334     server
.io_ready_pipe_read 
= pipefds
[0]; 
9335     server
.io_ready_pipe_write 
= pipefds
[1]; 
9336     redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
); 
9337     /* LZF requires a lot of stack */ 
9338     pthread_attr_init(&server
.io_threads_attr
); 
9339     pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
); 
9340     while (stacksize 
< REDIS_THREAD_STACK_SIZE
) stacksize 
*= 2; 
9341     pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
); 
9342     /* Listen for events in the threaded I/O pipe */ 
9343     if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
, 
9344         vmThreadedIOCompletedJob
, NULL
) == AE_ERR
) 
9345         oom("creating file event"); 
9348 /* Mark the page as used */ 
9349 static void vmMarkPageUsed(off_t page
) { 
9350     off_t byte 
= page
/8; 
9352     redisAssert(vmFreePage(page
) == 1); 
9353     server
.vm_bitmap
[byte
] |= 1<<bit
; 
9356 /* Mark N contiguous pages as used, with 'page' being the first. */ 
9357 static void vmMarkPagesUsed(off_t page
, off_t count
) { 
9360     for (j 
= 0; j 
< count
; j
++) 
9361         vmMarkPageUsed(page
+j
); 
9362     server
.vm_stats_used_pages 
+= count
; 
9363     redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n", 
9364         (long long)count
, (long long)page
); 
9367 /* Mark the page as free */ 
9368 static void vmMarkPageFree(off_t page
) { 
9369     off_t byte 
= page
/8; 
9371     redisAssert(vmFreePage(page
) == 0); 
9372     server
.vm_bitmap
[byte
] &= ~(1<<bit
); 
9375 /* Mark N contiguous pages as free, with 'page' being the first. */ 
9376 static void vmMarkPagesFree(off_t page
, off_t count
) { 
9379     for (j 
= 0; j 
< count
; j
++) 
9380         vmMarkPageFree(page
+j
); 
9381     server
.vm_stats_used_pages 
-= count
; 
9382     redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n", 
9383         (long long)count
, (long long)page
); 
9386 /* Test if the page is free */ 
9387 static int vmFreePage(off_t page
) { 
9388     off_t byte 
= page
/8; 
9390     return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0; 
9393 /* Find N contiguous free pages storing the first page of the cluster in *first. 
9394  * Returns REDIS_OK if it was able to find N contiguous pages, otherwise 
9395  * REDIS_ERR is returned. 
9397  * This function uses a simple algorithm: we try to allocate 
9398  * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start 
9399  * again from the start of the swap file searching for free spaces. 
9401  * If it looks pretty clear that there are no free pages near our offset 
9402  * we try to find less populated places doing a forward jump of 
9403  * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages 
9404  * without hurry, and then we jump again and so forth... 
9406  * This function can be improved using a free list to avoid to guess 
9407  * too much, since we could collect data about freed pages. 
9409  * note: I implemented this function just after watching an episode of 
9410  * Battlestar Galactica, where the hybrid was continuing to say "JUMP!" 
9412 static int vmFindContiguousPages(off_t 
*first
, off_t n
) { 
9413     off_t base
, offset 
= 0, since_jump 
= 0, numfree 
= 0; 
9415     if (server
.vm_near_pages 
== REDIS_VM_MAX_NEAR_PAGES
) { 
9416         server
.vm_near_pages 
= 0; 
9417         server
.vm_next_page 
= 0; 
9419     server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */ 
9420     base 
= server
.vm_next_page
; 
9422     while(offset 
< server
.vm_pages
) { 
9423         off_t 
this = base
+offset
; 
9425         /* If we overflow, restart from page zero */ 
9426         if (this >= server
.vm_pages
) { 
9427             this -= server
.vm_pages
; 
9429                 /* Just overflowed, what we found on tail is no longer 
9430                  * interesting, as it's no longer contiguous. */ 
9434         if (vmFreePage(this)) { 
9435             /* This is a free page */ 
9437             /* Already got N free pages? Return to the caller, with success */ 
9439                 *first 
= this-(n
-1); 
9440                 server
.vm_next_page 
= this+1; 
9441                 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
); 
9445             /* The current one is not a free page */ 
9449         /* Fast-forward if the current page is not free and we already 
9450          * searched enough near this place. */ 
9452         if (!numfree 
&& since_jump 
>= REDIS_VM_MAX_RANDOM_JUMP
/4) { 
9453             offset 
+= random() % REDIS_VM_MAX_RANDOM_JUMP
; 
9455             /* Note that even if we rewind after the jump, we are don't need 
9456              * to make sure numfree is set to zero as we only jump *if* it 
9457              * is set to zero. */ 
9459             /* Otherwise just check the next page */ 
9466 /* Write the specified object at the specified page of the swap file */ 
9467 static int vmWriteObjectOnSwap(robj 
*o
, off_t page
) { 
9468     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
9469     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
9470         if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
9471         redisLog(REDIS_WARNING
, 
9472             "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s", 
9476     rdbSaveObject(server
.vm_fp
,o
); 
9477     fflush(server
.vm_fp
); 
9478     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
9482 /* Transfers the 'val' object to disk. Store all the information 
9483  * a 'vmpointer' object containing all the information needed to load the 
9484  * object back later is returned. 
9486  * If we can't find enough contiguous empty pages to swap the object on disk 
9487  * NULL is returned. */ 
9488 static vmpointer 
*vmSwapObjectBlocking(robj 
*val
) { 
9489     off_t pages 
= rdbSavedObjectPages(val
,NULL
); 
9493     assert(val
->storage 
== REDIS_VM_MEMORY
); 
9494     assert(val
->refcount 
== 1); 
9495     if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return NULL
; 
9496     if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return NULL
; 
9498     vp 
= createVmPointer(val
->type
); 
9500     vp
->usedpages 
= pages
; 
9501     decrRefCount(val
); /* Deallocate the object from memory. */ 
9502     vmMarkPagesUsed(page
,pages
); 
9503     redisLog(REDIS_DEBUG
,"VM: object %p swapped out at %lld (%lld pages)", 
9505         (unsigned long long) page
, (unsigned long long) pages
); 
9506     server
.vm_stats_swapped_objects
++; 
9507     server
.vm_stats_swapouts
++; 
9511 static robj 
*vmReadObjectFromSwap(off_t page
, int type
) { 
9514     if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
); 
9515     if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) { 
9516         redisLog(REDIS_WARNING
, 
9517             "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s", 
9521     o 
= rdbLoadObject(type
,server
.vm_fp
); 
9523         redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
)); 
9526     if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
); 
9530 /* Load the specified object from swap to memory. 
9531  * The newly allocated object is returned. 
9533  * If preview is true the unserialized object is returned to the caller but 
9534  * the pages are not marked as freed, nor the vp object is freed. */ 
9535 static robj 
*vmGenericLoadObject(vmpointer 
*vp
, int preview
) { 
9538     redisAssert(vp
->type 
== REDIS_VMPOINTER 
&& 
9539         (vp
->storage 
== REDIS_VM_SWAPPED 
|| vp
->storage 
== REDIS_VM_LOADING
)); 
9540     val 
= vmReadObjectFromSwap(vp
->page
,vp
->vtype
); 
9542         redisLog(REDIS_DEBUG
, "VM: object %p loaded from disk", (void*)vp
); 
9543         vmMarkPagesFree(vp
->page
,vp
->usedpages
); 
9545         server
.vm_stats_swapped_objects
--; 
9547         redisLog(REDIS_DEBUG
, "VM: object %p previewed from disk", (void*)vp
); 
9549     server
.vm_stats_swapins
++; 
9553 /* Plain object loading, from swap to memory. 
9555  * 'o' is actually a redisVmPointer structure that will be freed by the call. 
9556  * The return value is the loaded object. */ 
9557 static robj 
*vmLoadObject(robj 
*o
) { 
9558     /* If we are loading the object in background, stop it, we 
9559      * need to load this object synchronously ASAP. */ 
9560     if (o
->storage 
== REDIS_VM_LOADING
) 
9561         vmCancelThreadedIOJob(o
); 
9562     return vmGenericLoadObject((vmpointer
*)o
,0); 
9565 /* Just load the value on disk, without to modify the key. 
9566  * This is useful when we want to perform some operation on the value 
9567  * without to really bring it from swap to memory, like while saving the 
9568  * dataset or rewriting the append only log. */ 
9569 static robj 
*vmPreviewObject(robj 
*o
) { 
9570     return vmGenericLoadObject((vmpointer
*)o
,1); 
9573 /* How a good candidate is this object for swapping? 
9574  * The better candidate it is, the greater the returned value. 
9576  * Currently we try to perform a fast estimation of the object size in 
9577  * memory, and combine it with aging informations. 
9579  * Basically swappability = idle-time * log(estimated size) 
9581  * Bigger objects are preferred over smaller objects, but not 
9582  * proportionally, this is why we use the logarithm. This algorithm is 
9583  * just a first try and will probably be tuned later. */ 
9584 static double computeObjectSwappability(robj 
*o
) { 
9585     /* actual age can be >= minage, but not < minage. As we use wrapping 
9586      * 21 bit clocks with minutes resolution for the LRU. */ 
9587     time_t minage 
= abs(server
.lruclock 
- o
->lru
); 
9591     struct dictEntry 
*de
; 
9594     if (minage 
<= 0) return 0; 
9597         if (o
->encoding 
!= REDIS_ENCODING_RAW
) { 
9600             asize 
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2; 
9605         listNode 
*ln 
= listFirst(l
); 
9607         asize 
= sizeof(list
); 
9609             robj 
*ele 
= ln
->value
; 
9612             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9613                             (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9614             asize 
+= (sizeof(listNode
)+elesize
)*listLength(l
); 
9619         z 
= (o
->type 
== REDIS_ZSET
); 
9620         d 
= z 
? ((zset
*)o
->ptr
)->dict 
: o
->ptr
; 
9622         asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
9623         if (z
) asize 
+= sizeof(zset
)-sizeof(dict
); 
9628             de 
= dictGetRandomKey(d
); 
9629             ele 
= dictGetEntryKey(de
); 
9630             elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9631                             (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9632             asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
9633             if (z
) asize 
+= sizeof(zskiplistNode
)*dictSize(d
); 
9637         if (o
->encoding 
== REDIS_ENCODING_ZIPMAP
) { 
9638             unsigned char *p 
= zipmapRewind((unsigned char*)o
->ptr
); 
9639             unsigned int len 
= zipmapLen((unsigned char*)o
->ptr
); 
9640             unsigned int klen
, vlen
; 
9641             unsigned char *key
, *val
; 
9643             if ((p 
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) { 
9647             asize 
= len
*(klen
+vlen
+3); 
9648         } else if (o
->encoding 
== REDIS_ENCODING_HT
) { 
9650             asize 
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
)); 
9655                 de 
= dictGetRandomKey(d
); 
9656                 ele 
= dictGetEntryKey(de
); 
9657                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9658                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9659                 ele 
= dictGetEntryVal(de
); 
9660                 elesize 
= (ele
->encoding 
== REDIS_ENCODING_RAW
) ? 
9661                                 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
); 
9662                 asize 
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
); 
9667     return (double)minage
*log(1+asize
); 
9670 /* Try to swap an object that's a good candidate for swapping. 
9671  * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible 
9672  * to swap any object at all. 
9674  * If 'usethreaded' is true, Redis will try to swap the object in background 
9675  * using I/O threads. */ 
9676 static int vmSwapOneObject(int usethreads
) { 
9678     struct dictEntry 
*best 
= NULL
; 
9679     double best_swappability 
= 0; 
9680     redisDb 
*best_db 
= NULL
; 
9684     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
9685         redisDb 
*db 
= server
.db
+j
; 
9686         /* Why maxtries is set to 100? 
9687          * Because this way (usually) we'll find 1 object even if just 1% - 2% 
9688          * are swappable objects */ 
9691         if (dictSize(db
->dict
) == 0) continue; 
9692         for (i 
= 0; i 
< 5; i
++) { 
9694             double swappability
; 
9696             if (maxtries
) maxtries
--; 
9697             de 
= dictGetRandomKey(db
->dict
); 
9698             val 
= dictGetEntryVal(de
); 
9699             /* Only swap objects that are currently in memory. 
9701              * Also don't swap shared objects: not a good idea in general and 
9702              * we need to ensure that the main thread does not touch the 
9703              * object while the I/O thread is using it, but we can't 
9704              * control other keys without adding additional mutex. */ 
9705             if (val
->storage 
!= REDIS_VM_MEMORY 
|| val
->refcount 
!= 1) { 
9706                 if (maxtries
) i
--; /* don't count this try */ 
9709             swappability 
= computeObjectSwappability(val
); 
9710             if (!best 
|| swappability 
> best_swappability
) { 
9712                 best_swappability 
= swappability
; 
9717     if (best 
== NULL
) return REDIS_ERR
; 
9718     key 
= dictGetEntryKey(best
); 
9719     val 
= dictGetEntryVal(best
); 
9721     redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f", 
9722         key
, best_swappability
); 
9726         robj 
*keyobj 
= createStringObject(key
,sdslen(key
)); 
9727         vmSwapObjectThreaded(keyobj
,val
,best_db
); 
9728         decrRefCount(keyobj
); 
9733         if ((vp 
= vmSwapObjectBlocking(val
)) != NULL
) { 
9734             dictGetEntryVal(best
) = vp
; 
9742 static int vmSwapOneObjectBlocking() { 
9743     return vmSwapOneObject(0); 
9746 static int vmSwapOneObjectThreaded() { 
9747     return vmSwapOneObject(1); 
9750 /* Return true if it's safe to swap out objects in a given moment. 
9751  * Basically we don't want to swap objects out while there is a BGSAVE 
9752  * or a BGAEOREWRITE running in backgroud. */ 
9753 static int vmCanSwapOut(void) { 
9754     return (server
.bgsavechildpid 
== -1 && server
.bgrewritechildpid 
== -1); 
9757 /* =================== Virtual Memory - Threaded I/O  ======================= */ 
9759 static void freeIOJob(iojob 
*j
) { 
9760     if ((j
->type 
== REDIS_IOJOB_PREPARE_SWAP 
|| 
9761         j
->type 
== REDIS_IOJOB_DO_SWAP 
|| 
9762         j
->type 
== REDIS_IOJOB_LOAD
) && j
->val 
!= NULL
) 
9764          /* we fix the storage type, otherwise decrRefCount() will try to 
9765           * kill the I/O thread Job (that does no longer exists). */ 
9766         if (j
->val
->storage 
== REDIS_VM_SWAPPING
) 
9767             j
->val
->storage 
= REDIS_VM_MEMORY
; 
9768         decrRefCount(j
->val
); 
9770     decrRefCount(j
->key
); 
9774 /* Every time a thread finished a Job, it writes a byte into the write side 
9775  * of an unix pipe in order to "awake" the main thread, and this function 
9777 static void vmThreadedIOCompletedJob(aeEventLoop 
*el
, int fd
, void *privdata
, 
9781     int retval
, processed 
= 0, toprocess 
= -1, trytoswap 
= 1; 
9783     REDIS_NOTUSED(mask
); 
9784     REDIS_NOTUSED(privdata
); 
9786     /* For every byte we read in the read side of the pipe, there is one 
9787      * I/O job completed to process. */ 
9788     while((retval 
= read(fd
,buf
,1)) == 1) { 
9791         struct dictEntry 
*de
; 
9793         redisLog(REDIS_DEBUG
,"Processing I/O completed job"); 
9795         /* Get the processed element (the oldest one) */ 
9797         assert(listLength(server
.io_processed
) != 0); 
9798         if (toprocess 
== -1) { 
9799             toprocess 
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100; 
9800             if (toprocess 
<= 0) toprocess 
= 1; 
9802         ln 
= listFirst(server
.io_processed
); 
9804         listDelNode(server
.io_processed
,ln
); 
9806         /* If this job is marked as canceled, just ignore it */ 
9811         /* Post process it in the main thread, as there are things we 
9812          * can do just here to avoid race conditions and/or invasive locks */ 
9813         redisLog(REDIS_DEBUG
,"COMPLETED Job type: %d, ID %p, key: %s", j
->type
, (void*)j
->id
, (unsigned char*)j
->key
->ptr
); 
9814         de 
= dictFind(j
->db
->dict
,j
->key
->ptr
); 
9815         redisAssert(de 
!= NULL
); 
9816         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
9818             vmpointer 
*vp 
= dictGetEntryVal(de
); 
9820             /* Key loaded, bring it at home */ 
9821             vmMarkPagesFree(vp
->page
,vp
->usedpages
); 
9822             redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)", 
9823                 (unsigned char*) j
->key
->ptr
); 
9824             server
.vm_stats_swapped_objects
--; 
9825             server
.vm_stats_swapins
++; 
9826             dictGetEntryVal(de
) = j
->val
; 
9827             incrRefCount(j
->val
); 
9829             /* Handle clients waiting for this key to be loaded. */ 
9830             handleClientsBlockedOnSwappedKey(db
,j
->key
); 
9833         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
9834             /* Now we know the amount of pages required to swap this object. 
9835              * Let's find some space for it, and queue this task again 
9836              * rebranded as REDIS_IOJOB_DO_SWAP. */ 
9837             if (!vmCanSwapOut() || 
9838                 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
) 
9840                 /* Ooops... no space or we can't swap as there is 
9841                  * a fork()ed Redis trying to save stuff on disk. */ 
9842                 j
->val
->storage 
= REDIS_VM_MEMORY
; /* undo operation */ 
9845                 /* Note that we need to mark this pages as used now, 
9846                  * if the job will be canceled, we'll mark them as freed 
9848                 vmMarkPagesUsed(j
->page
,j
->pages
); 
9849                 j
->type 
= REDIS_IOJOB_DO_SWAP
; 
9854         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
9857             /* Key swapped. We can finally free some memory. */ 
9858             if (j
->val
->storage 
!= REDIS_VM_SWAPPING
) { 
9859                 vmpointer 
*vp 
= (vmpointer
*) j
->id
; 
9860                 printf("storage: %d\n",vp
->storage
); 
9861                 printf("key->name: %s\n",(char*)j
->key
->ptr
); 
9862                 printf("val: %p\n",(void*)j
->val
); 
9863                 printf("val->type: %d\n",j
->val
->type
); 
9864                 printf("val->ptr: %s\n",(char*)j
->val
->ptr
); 
9866             redisAssert(j
->val
->storage 
== REDIS_VM_SWAPPING
); 
9867             vp 
= createVmPointer(j
->val
->type
); 
9869             vp
->usedpages 
= j
->pages
; 
9870             dictGetEntryVal(de
) = vp
; 
9871             /* Fix the storage otherwise decrRefCount will attempt to 
9872              * remove the associated I/O job */ 
9873             j
->val
->storage 
= REDIS_VM_MEMORY
; 
9874             decrRefCount(j
->val
); 
9875             redisLog(REDIS_DEBUG
, 
9876                 "VM: object %s swapped out at %lld (%lld pages) (threaded)", 
9877                 (unsigned char*) j
->key
->ptr
, 
9878                 (unsigned long long) j
->page
, (unsigned long long) j
->pages
); 
9879             server
.vm_stats_swapped_objects
++; 
9880             server
.vm_stats_swapouts
++; 
9882             /* Put a few more swap requests in queue if we are still 
9884             if (trytoswap 
&& vmCanSwapOut() && 
9885                 zmalloc_used_memory() > server
.vm_max_memory
) 
9890                     more 
= listLength(server
.io_newjobs
) < 
9891                             (unsigned) server
.vm_max_threads
; 
9893                     /* Don't waste CPU time if swappable objects are rare. */ 
9894                     if (vmSwapOneObjectThreaded() == REDIS_ERR
) { 
9902         if (processed 
== toprocess
) return; 
9904     if (retval 
< 0 && errno 
!= EAGAIN
) { 
9905         redisLog(REDIS_WARNING
, 
9906             "WARNING: read(2) error in vmThreadedIOCompletedJob() %s", 
9911 static void lockThreadedIO(void) { 
9912     pthread_mutex_lock(&server
.io_mutex
); 
9915 static void unlockThreadedIO(void) { 
9916     pthread_mutex_unlock(&server
.io_mutex
); 
9919 /* Remove the specified object from the threaded I/O queue if still not 
9920  * processed, otherwise make sure to flag it as canceled. */ 
9921 static void vmCancelThreadedIOJob(robj 
*o
) { 
9923         server
.io_newjobs
,      /* 0 */ 
9924         server
.io_processing
,   /* 1 */ 
9925         server
.io_processed     
/* 2 */ 
9929     assert(o
->storage 
== REDIS_VM_LOADING 
|| o
->storage 
== REDIS_VM_SWAPPING
); 
9932     /* Search for a matching object in one of the queues */ 
9933     for (i 
= 0; i 
< 3; i
++) { 
9937         listRewind(lists
[i
],&li
); 
9938         while ((ln 
= listNext(&li
)) != NULL
) { 
9939             iojob 
*job 
= ln
->value
; 
9941             if (job
->canceled
) continue; /* Skip this, already canceled. */ 
9943                 redisLog(REDIS_DEBUG
,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n", 
9944                     (void*)job
, (char*)job
->key
->ptr
, job
->type
, i
); 
9945                 /* Mark the pages as free since the swap didn't happened 
9946                  * or happened but is now discarded. */ 
9947                 if (i 
!= 1 && job
->type 
== REDIS_IOJOB_DO_SWAP
) 
9948                     vmMarkPagesFree(job
->page
,job
->pages
); 
9949                 /* Cancel the job. It depends on the list the job is 
9952                 case 0: /* io_newjobs */ 
9953                     /* If the job was yet not processed the best thing to do 
9954                      * is to remove it from the queue at all */ 
9956                     listDelNode(lists
[i
],ln
); 
9958                 case 1: /* io_processing */ 
9959                     /* Oh Shi- the thread is messing with the Job: 
9961                      * Probably it's accessing the object if this is a 
9962                      * PREPARE_SWAP or DO_SWAP job. 
9963                      * If it's a LOAD job it may be reading from disk and 
9964                      * if we don't wait for the job to terminate before to 
9965                      * cancel it, maybe in a few microseconds data can be 
9966                      * corrupted in this pages. So the short story is: 
9968                      * Better to wait for the job to move into the 
9969                      * next queue (processed)... */ 
9971                     /* We try again and again until the job is completed. */ 
9973                     /* But let's wait some time for the I/O thread 
9974                      * to finish with this job. After all this condition 
9975                      * should be very rare. */ 
9978                 case 2: /* io_processed */ 
9979                     /* The job was already processed, that's easy... 
9980                      * just mark it as canceled so that we'll ignore it 
9981                      * when processing completed jobs. */ 
9985                 /* Finally we have to adjust the storage type of the object 
9986                  * in order to "UNDO" the operaiton. */ 
9987                 if (o
->storage 
== REDIS_VM_LOADING
) 
9988                     o
->storage 
= REDIS_VM_SWAPPED
; 
9989                 else if (o
->storage 
== REDIS_VM_SWAPPING
) 
9990                     o
->storage 
= REDIS_VM_MEMORY
; 
9992                 redisLog(REDIS_DEBUG
,"*** DONE"); 
9998     printf("Not found: %p\n", (void*)o
); 
9999     redisAssert(1 != 1); /* We should never reach this */ 
10002 static void *IOThreadEntryPoint(void *arg
) { 
10005     REDIS_NOTUSED(arg
); 
10007     pthread_detach(pthread_self()); 
10009         /* Get a new job to process */ 
10011         if (listLength(server
.io_newjobs
) == 0) { 
10012             /* No new jobs in queue, exit. */ 
10013             redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do", 
10014                 (long) pthread_self()); 
10015             server
.io_active_threads
--; 
10016             unlockThreadedIO(); 
10019         ln 
= listFirst(server
.io_newjobs
); 
10021         listDelNode(server
.io_newjobs
,ln
); 
10022         /* Add the job in the processing queue */ 
10023         j
->thread 
= pthread_self(); 
10024         listAddNodeTail(server
.io_processing
,j
); 
10025         ln 
= listLast(server
.io_processing
); /* We use ln later to remove it */ 
10026         unlockThreadedIO(); 
10027         redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'", 
10028             (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
); 
10030         /* Process the Job */ 
10031         if (j
->type 
== REDIS_IOJOB_LOAD
) { 
10032             vmpointer 
*vp 
= (vmpointer
*)j
->id
; 
10033             j
->val 
= vmReadObjectFromSwap(j
->page
,vp
->vtype
); 
10034         } else if (j
->type 
== REDIS_IOJOB_PREPARE_SWAP
) { 
10035             FILE *fp 
= fopen("/dev/null","w+"); 
10036             j
->pages 
= rdbSavedObjectPages(j
->val
,fp
); 
10038         } else if (j
->type 
== REDIS_IOJOB_DO_SWAP
) { 
10039             if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
) 
10043         /* Done: insert the job into the processed queue */ 
10044         redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)", 
10045             (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
); 
10047         listDelNode(server
.io_processing
,ln
); 
10048         listAddNodeTail(server
.io_processed
,j
); 
10049         unlockThreadedIO(); 
10051         /* Signal the main thread there is new stuff to process */ 
10052         assert(write(server
.io_ready_pipe_write
,"x",1) == 1); 
10054     return NULL
; /* never reached */ 
10057 static void spawnIOThread(void) { 
10059     sigset_t mask
, omask
; 
10062     sigemptyset(&mask
); 
10063     sigaddset(&mask
,SIGCHLD
); 
10064     sigaddset(&mask
,SIGHUP
); 
10065     sigaddset(&mask
,SIGPIPE
); 
10066     pthread_sigmask(SIG_SETMASK
, &mask
, &omask
); 
10067     while ((err 
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) { 
10068         redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s", 
10072     pthread_sigmask(SIG_SETMASK
, &omask
, NULL
); 
10073     server
.io_active_threads
++; 
10076 /* We need to wait for the last thread to exit before we are able to 
10077  * fork() in order to BGSAVE or BGREWRITEAOF. */ 
10078 static void waitEmptyIOJobsQueue(void) { 
10080         int io_processed_len
; 
10083         if (listLength(server
.io_newjobs
) == 0 && 
10084             listLength(server
.io_processing
) == 0 && 
10085             server
.io_active_threads 
== 0) 
10087             unlockThreadedIO(); 
10090         /* While waiting for empty jobs queue condition we post-process some 
10091          * finshed job, as I/O threads may be hanging trying to write against 
10092          * the io_ready_pipe_write FD but there are so much pending jobs that 
10093          * it's blocking. */ 
10094         io_processed_len 
= listLength(server
.io_processed
); 
10095         unlockThreadedIO(); 
10096         if (io_processed_len
) { 
10097             vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0); 
10098             usleep(1000); /* 1 millisecond */ 
10100             usleep(10000); /* 10 milliseconds */ 
10105 static void vmReopenSwapFile(void) { 
10106     /* Note: we don't close the old one as we are in the child process 
10107      * and don't want to mess at all with the original file object. */ 
10108     server
.vm_fp 
= fopen(server
.vm_swap_file
,"r+b"); 
10109     if (server
.vm_fp 
== NULL
) { 
10110         redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.", 
10111             server
.vm_swap_file
); 
10114     server
.vm_fd 
= fileno(server
.vm_fp
); 
10117 /* This function must be called while with threaded IO locked */ 
10118 static void queueIOJob(iojob 
*j
) { 
10119     redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n", 
10120         (void*)j
, j
->type
, (char*)j
->key
->ptr
); 
10121     listAddNodeTail(server
.io_newjobs
,j
); 
10122     if (server
.io_active_threads 
< server
.vm_max_threads
) 
10126 static int vmSwapObjectThreaded(robj 
*key
, robj 
*val
, redisDb 
*db
) { 
10129     j 
= zmalloc(sizeof(*j
)); 
10130     j
->type 
= REDIS_IOJOB_PREPARE_SWAP
; 
10134     j
->id 
= j
->val 
= val
; 
10137     j
->thread 
= (pthread_t
) -1; 
10138     val
->storage 
= REDIS_VM_SWAPPING
; 
10142     unlockThreadedIO(); 
10146 /* ============ Virtual Memory - Blocking clients on missing keys =========== */ 
10148 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded. 
10149  * If there is not already a job loading the key, it is craeted. 
10150  * The key is added to the io_keys list in the client structure, and also 
10151  * in the hash table mapping swapped keys to waiting clients, that is, 
10152  * server.io_waited_keys. */ 
10153 static int waitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
10154     struct dictEntry 
*de
; 
10158     /* If the key does not exist or is already in RAM we don't need to 
10159      * block the client at all. */ 
10160     de 
= dictFind(c
->db
->dict
,key
->ptr
); 
10161     if (de 
== NULL
) return 0; 
10162     o 
= dictGetEntryVal(de
); 
10163     if (o
->storage 
== REDIS_VM_MEMORY
) { 
10165     } else if (o
->storage 
== REDIS_VM_SWAPPING
) { 
10166         /* We were swapping the key, undo it! */ 
10167         vmCancelThreadedIOJob(o
); 
10171     /* OK: the key is either swapped, or being loaded just now. */ 
10173     /* Add the key to the list of keys this client is waiting for. 
10174      * This maps clients to keys they are waiting for. */ 
10175     listAddNodeTail(c
->io_keys
,key
); 
10178     /* Add the client to the swapped keys => clients waiting map. */ 
10179     de 
= dictFind(c
->db
->io_keys
,key
); 
10183         /* For every key we take a list of clients blocked for it */ 
10185         retval 
= dictAdd(c
->db
->io_keys
,key
,l
); 
10187         assert(retval 
== DICT_OK
); 
10189         l 
= dictGetEntryVal(de
); 
10191     listAddNodeTail(l
,c
); 
10193     /* Are we already loading the key from disk? If not create a job */ 
10194     if (o
->storage 
== REDIS_VM_SWAPPED
) { 
10196         vmpointer 
*vp 
= (vmpointer
*)o
; 
10198         o
->storage 
= REDIS_VM_LOADING
; 
10199         j 
= zmalloc(sizeof(*j
)); 
10200         j
->type 
= REDIS_IOJOB_LOAD
; 
10205         j
->page 
= vp
->page
; 
10208         j
->thread 
= (pthread_t
) -1; 
10211         unlockThreadedIO(); 
10216 /* Preload keys for any command with first, last and step values for 
10217  * the command keys prototype, as defined in the command table. */ 
10218 static void waitForMultipleSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
10220     if (cmd
->vm_firstkey 
== 0) return; 
10221     last 
= cmd
->vm_lastkey
; 
10222     if (last 
< 0) last 
= argc
+last
; 
10223     for (j 
= cmd
->vm_firstkey
; j 
<= last
; j 
+= cmd
->vm_keystep
) { 
10224         redisAssert(j 
< argc
); 
10225         waitForSwappedKey(c
,argv
[j
]); 
10229 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands. 
10230  * Note that the number of keys to preload is user-defined, so we need to 
10231  * apply a sanity check against argc. */ 
10232 static void zunionInterBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
10234     REDIS_NOTUSED(cmd
); 
10236     num 
= atoi(argv
[2]->ptr
); 
10237     if (num 
> (argc
-3)) return; 
10238     for (i 
= 0; i 
< num
; i
++) { 
10239         waitForSwappedKey(c
,argv
[3+i
]); 
10243 /* Preload keys needed to execute the entire MULTI/EXEC block. 
10245  * This function is called by blockClientOnSwappedKeys when EXEC is issued, 
10246  * and will block the client when any command requires a swapped out value. */ 
10247 static void execBlockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
, int argc
, robj 
**argv
) { 
10249     struct redisCommand 
*mcmd
; 
10251     REDIS_NOTUSED(cmd
); 
10252     REDIS_NOTUSED(argc
); 
10253     REDIS_NOTUSED(argv
); 
10255     if (!(c
->flags 
& REDIS_MULTI
)) return; 
10256     for (i 
= 0; i 
< c
->mstate
.count
; i
++) { 
10257         mcmd 
= c
->mstate
.commands
[i
].cmd
; 
10258         margc 
= c
->mstate
.commands
[i
].argc
; 
10259         margv 
= c
->mstate
.commands
[i
].argv
; 
10261         if (mcmd
->vm_preload_proc 
!= NULL
) { 
10262             mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
); 
10264             waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
); 
10269 /* Is this client attempting to run a command against swapped keys? 
10270  * If so, block it ASAP, load the keys in background, then resume it. 
10272  * The important idea about this function is that it can fail! If keys will 
10273  * still be swapped when the client is resumed, this key lookups will 
10274  * just block loading keys from disk. In practical terms this should only 
10275  * happen with SORT BY command or if there is a bug in this function. 
10277  * Return 1 if the client is marked as blocked, 0 if the client can 
10278  * continue as the keys it is going to access appear to be in memory. */ 
10279 static int blockClientOnSwappedKeys(redisClient 
*c
, struct redisCommand 
*cmd
) { 
10280     if (cmd
->vm_preload_proc 
!= NULL
) { 
10281         cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
); 
10283         waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
); 
10286     /* If the client was blocked for at least one key, mark it as blocked. */ 
10287     if (listLength(c
->io_keys
)) { 
10288         c
->flags 
|= REDIS_IO_WAIT
; 
10289         aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
); 
10290         server
.vm_blocked_clients
++; 
10297 /* Remove the 'key' from the list of blocked keys for a given client. 
10299  * The function returns 1 when there are no longer blocking keys after 
10300  * the current one was removed (and the client can be unblocked). */ 
10301 static int dontWaitForSwappedKey(redisClient 
*c
, robj 
*key
) { 
10305     struct dictEntry 
*de
; 
10307     /* Remove the key from the list of keys this client is waiting for. */ 
10308     listRewind(c
->io_keys
,&li
); 
10309     while ((ln 
= listNext(&li
)) != NULL
) { 
10310         if (equalStringObjects(ln
->value
,key
)) { 
10311             listDelNode(c
->io_keys
,ln
); 
10315     assert(ln 
!= NULL
); 
10317     /* Remove the client form the key => waiting clients map. */ 
10318     de 
= dictFind(c
->db
->io_keys
,key
); 
10319     assert(de 
!= NULL
); 
10320     l 
= dictGetEntryVal(de
); 
10321     ln 
= listSearchKey(l
,c
); 
10322     assert(ln 
!= NULL
); 
10324     if (listLength(l
) == 0) 
10325         dictDelete(c
->db
->io_keys
,key
); 
10327     return listLength(c
->io_keys
) == 0; 
10330 /* Every time we now a key was loaded back in memory, we handle clients 
10331  * waiting for this key if any. */ 
10332 static void handleClientsBlockedOnSwappedKey(redisDb 
*db
, robj 
*key
) { 
10333     struct dictEntry 
*de
; 
10338     de 
= dictFind(db
->io_keys
,key
); 
10341     l 
= dictGetEntryVal(de
); 
10342     len 
= listLength(l
); 
10343     /* Note: we can't use something like while(listLength(l)) as the list 
10344      * can be freed by the calling function when we remove the last element. */ 
10347         redisClient 
*c 
= ln
->value
; 
10349         if (dontWaitForSwappedKey(c
,key
)) { 
10350             /* Put the client in the list of clients ready to go as we 
10351              * loaded all the keys about it. */ 
10352             listAddNodeTail(server
.io_ready_clients
,c
); 
10357 /* =========================== Remote Configuration ========================= */ 
10359 static void configSetCommand(redisClient 
*c
) { 
10360     robj 
*o 
= getDecodedObject(c
->argv
[3]); 
10363     if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) { 
10364         zfree(server
.dbfilename
); 
10365         server
.dbfilename 
= zstrdup(o
->ptr
); 
10366     } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) { 
10367         zfree(server
.requirepass
); 
10368         server
.requirepass 
= zstrdup(o
->ptr
); 
10369     } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) { 
10370         zfree(server
.masterauth
); 
10371         server
.masterauth 
= zstrdup(o
->ptr
); 
10372     } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) { 
10373         if (getLongLongFromObject(o
,&ll
) == REDIS_ERR 
|| 
10374             ll 
< 0) goto badfmt
; 
10375         server
.maxmemory 
= ll
; 
10376     } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) { 
10377         if (getLongLongFromObject(o
,&ll
) == REDIS_ERR 
|| 
10378             ll 
< 0 || ll 
> LONG_MAX
) goto badfmt
; 
10379         server
.maxidletime 
= ll
; 
10380     } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) { 
10381         if (!strcasecmp(o
->ptr
,"no")) { 
10382             server
.appendfsync 
= APPENDFSYNC_NO
; 
10383         } else if (!strcasecmp(o
->ptr
,"everysec")) { 
10384             server
.appendfsync 
= APPENDFSYNC_EVERYSEC
; 
10385         } else if (!strcasecmp(o
->ptr
,"always")) { 
10386             server
.appendfsync 
= APPENDFSYNC_ALWAYS
; 
10390     } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) { 
10391         int yn 
= yesnotoi(o
->ptr
); 
10393         if (yn 
== -1) goto badfmt
; 
10394         server
.no_appendfsync_on_rewrite 
= yn
; 
10395     } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) { 
10396         int old 
= server
.appendonly
; 
10397         int new = yesnotoi(o
->ptr
); 
10399         if (new == -1) goto badfmt
; 
10404                 if (startAppendOnly() == REDIS_ERR
) { 
10405                     addReplySds(c
,sdscatprintf(sdsempty(), 
10406                         "-ERR Unable to turn on AOF. Check server logs.\r\n")); 
10412     } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) { 
10414         sds 
*v 
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
); 
10416         /* Perform sanity check before setting the new config: 
10417          * - Even number of args 
10418          * - Seconds >= 1, changes >= 0 */ 
10420             sdsfreesplitres(v
,vlen
); 
10423         for (j 
= 0; j 
< vlen
; j
++) { 
10427             val 
= strtoll(v
[j
], &eptr
, 10); 
10428             if (eptr
[0] != '\0' || 
10429                 ((j 
& 1) == 0 && val 
< 1) || 
10430                 ((j 
& 1) == 1 && val 
< 0)) { 
10431                 sdsfreesplitres(v
,vlen
); 
10435         /* Finally set the new config */ 
10436         resetServerSaveParams(); 
10437         for (j 
= 0; j 
< vlen
; j 
+= 2) { 
10441             seconds 
= strtoll(v
[j
],NULL
,10); 
10442             changes 
= strtoll(v
[j
+1],NULL
,10); 
10443             appendServerSaveParams(seconds
, changes
); 
10445         sdsfreesplitres(v
,vlen
); 
10447         addReplySds(c
,sdscatprintf(sdsempty(), 
10448             "-ERR not supported CONFIG parameter %s\r\n", 
10449             (char*)c
->argv
[2]->ptr
)); 
10454     addReply(c
,shared
.ok
); 
10457 badfmt
: /* Bad format errors */ 
10458     addReplySds(c
,sdscatprintf(sdsempty(), 
10459         "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n", 
10461             (char*)c
->argv
[2]->ptr
)); 
10465 static void configGetCommand(redisClient 
*c
) { 
10466     robj 
*o 
= getDecodedObject(c
->argv
[2]); 
10467     robj 
*lenobj 
= createObject(REDIS_STRING
,NULL
); 
10468     char *pattern 
= o
->ptr
; 
10471     addReply(c
,lenobj
); 
10472     decrRefCount(lenobj
); 
10474     if (stringmatch(pattern
,"dbfilename",0)) { 
10475         addReplyBulkCString(c
,"dbfilename"); 
10476         addReplyBulkCString(c
,server
.dbfilename
); 
10479     if (stringmatch(pattern
,"requirepass",0)) { 
10480         addReplyBulkCString(c
,"requirepass"); 
10481         addReplyBulkCString(c
,server
.requirepass
); 
10484     if (stringmatch(pattern
,"masterauth",0)) { 
10485         addReplyBulkCString(c
,"masterauth"); 
10486         addReplyBulkCString(c
,server
.masterauth
); 
10489     if (stringmatch(pattern
,"maxmemory",0)) { 
10492         ll2string(buf
,128,server
.maxmemory
); 
10493         addReplyBulkCString(c
,"maxmemory"); 
10494         addReplyBulkCString(c
,buf
); 
10497     if (stringmatch(pattern
,"timeout",0)) { 
10500         ll2string(buf
,128,server
.maxidletime
); 
10501         addReplyBulkCString(c
,"timeout"); 
10502         addReplyBulkCString(c
,buf
); 
10505     if (stringmatch(pattern
,"appendonly",0)) { 
10506         addReplyBulkCString(c
,"appendonly"); 
10507         addReplyBulkCString(c
,server
.appendonly 
? "yes" : "no"); 
10510     if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) { 
10511         addReplyBulkCString(c
,"no-appendfsync-on-rewrite"); 
10512         addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite 
? "yes" : "no"); 
10515     if (stringmatch(pattern
,"appendfsync",0)) { 
10518         switch(server
.appendfsync
) { 
10519         case APPENDFSYNC_NO
: policy 
= "no"; break; 
10520         case APPENDFSYNC_EVERYSEC
: policy 
= "everysec"; break; 
10521         case APPENDFSYNC_ALWAYS
: policy 
= "always"; break; 
10522         default: policy 
= "unknown"; break; /* too harmless to panic */ 
10524         addReplyBulkCString(c
,"appendfsync"); 
10525         addReplyBulkCString(c
,policy
); 
10528     if (stringmatch(pattern
,"save",0)) { 
10529         sds buf 
= sdsempty(); 
10532         for (j 
= 0; j 
< server
.saveparamslen
; j
++) { 
10533             buf 
= sdscatprintf(buf
,"%ld %d", 
10534                     server
.saveparams
[j
].seconds
, 
10535                     server
.saveparams
[j
].changes
); 
10536             if (j 
!= server
.saveparamslen
-1) 
10537                 buf 
= sdscatlen(buf
," ",1); 
10539         addReplyBulkCString(c
,"save"); 
10540         addReplyBulkCString(c
,buf
); 
10545     lenobj
->ptr 
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2); 
10548 static void configCommand(redisClient 
*c
) { 
10549     if (!strcasecmp(c
->argv
[1]->ptr
,"set")) { 
10550         if (c
->argc 
!= 4) goto badarity
; 
10551         configSetCommand(c
); 
10552     } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) { 
10553         if (c
->argc 
!= 3) goto badarity
; 
10554         configGetCommand(c
); 
10555     } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) { 
10556         if (c
->argc 
!= 2) goto badarity
; 
10557         server
.stat_numcommands 
= 0; 
10558         server
.stat_numconnections 
= 0; 
10559         server
.stat_expiredkeys 
= 0; 
10560         server
.stat_starttime 
= time(NULL
); 
10561         addReply(c
,shared
.ok
); 
10563         addReplySds(c
,sdscatprintf(sdsempty(), 
10564             "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n")); 
10569     addReplySds(c
,sdscatprintf(sdsempty(), 
10570         "-ERR Wrong number of arguments for CONFIG %s\r\n", 
10571         (char*) c
->argv
[1]->ptr
)); 
10574 /* =========================== Pubsub implementation ======================== */ 
10576 static void freePubsubPattern(void *p
) { 
10577     pubsubPattern 
*pat 
= p
; 
10579     decrRefCount(pat
->pattern
); 
10583 static int listMatchPubsubPattern(void *a
, void *b
) { 
10584     pubsubPattern 
*pa 
= a
, *pb 
= b
; 
10586     return (pa
->client 
== pb
->client
) && 
10587            (equalStringObjects(pa
->pattern
,pb
->pattern
)); 
10590 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or 
10591  * 0 if the client was already subscribed to that channel. */ 
10592 static int pubsubSubscribeChannel(redisClient 
*c
, robj 
*channel
) { 
10593     struct dictEntry 
*de
; 
10594     list 
*clients 
= NULL
; 
10597     /* Add the channel to the client -> channels hash table */ 
10598     if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) { 
10600         incrRefCount(channel
); 
10601         /* Add the client to the channel -> list of clients hash table */ 
10602         de 
= dictFind(server
.pubsub_channels
,channel
); 
10604             clients 
= listCreate(); 
10605             dictAdd(server
.pubsub_channels
,channel
,clients
); 
10606             incrRefCount(channel
); 
10608             clients 
= dictGetEntryVal(de
); 
10610         listAddNodeTail(clients
,c
); 
10612     /* Notify the client */ 
10613     addReply(c
,shared
.mbulk3
); 
10614     addReply(c
,shared
.subscribebulk
); 
10615     addReplyBulk(c
,channel
); 
10616     addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
10620 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
10621  * 0 if the client was not subscribed to the specified channel. */ 
10622 static int pubsubUnsubscribeChannel(redisClient 
*c
, robj 
*channel
, int notify
) { 
10623     struct dictEntry 
*de
; 
10628     /* Remove the channel from the client -> channels hash table */ 
10629     incrRefCount(channel
); /* channel may be just a pointer to the same object 
10630                             we have in the hash tables. Protect it... */ 
10631     if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) { 
10633         /* Remove the client from the channel -> clients list hash table */ 
10634         de 
= dictFind(server
.pubsub_channels
,channel
); 
10635         assert(de 
!= NULL
); 
10636         clients 
= dictGetEntryVal(de
); 
10637         ln 
= listSearchKey(clients
,c
); 
10638         assert(ln 
!= NULL
); 
10639         listDelNode(clients
,ln
); 
10640         if (listLength(clients
) == 0) { 
10641             /* Free the list and associated hash entry at all if this was 
10642              * the latest client, so that it will be possible to abuse 
10643              * Redis PUBSUB creating millions of channels. */ 
10644             dictDelete(server
.pubsub_channels
,channel
); 
10647     /* Notify the client */ 
10649         addReply(c
,shared
.mbulk3
); 
10650         addReply(c
,shared
.unsubscribebulk
); 
10651         addReplyBulk(c
,channel
); 
10652         addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+ 
10653                        listLength(c
->pubsub_patterns
)); 
10656     decrRefCount(channel
); /* it is finally safe to release it */ 
10660 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */ 
10661 static int pubsubSubscribePattern(redisClient 
*c
, robj 
*pattern
) { 
10664     if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) { 
10666         pubsubPattern 
*pat
; 
10667         listAddNodeTail(c
->pubsub_patterns
,pattern
); 
10668         incrRefCount(pattern
); 
10669         pat 
= zmalloc(sizeof(*pat
)); 
10670         pat
->pattern 
= getDecodedObject(pattern
); 
10672         listAddNodeTail(server
.pubsub_patterns
,pat
); 
10674     /* Notify the client */ 
10675     addReply(c
,shared
.mbulk3
); 
10676     addReply(c
,shared
.psubscribebulk
); 
10677     addReplyBulk(c
,pattern
); 
10678     addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
)); 
10682 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or 
10683  * 0 if the client was not subscribed to the specified channel. */ 
10684 static int pubsubUnsubscribePattern(redisClient 
*c
, robj 
*pattern
, int notify
) { 
10689     incrRefCount(pattern
); /* Protect the object. May be the same we remove */ 
10690     if ((ln 
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) { 
10692         listDelNode(c
->pubsub_patterns
,ln
); 
10694         pat
.pattern 
= pattern
; 
10695         ln 
= listSearchKey(server
.pubsub_patterns
,&pat
); 
10696         listDelNode(server
.pubsub_patterns
,ln
); 
10698     /* Notify the client */ 
10700         addReply(c
,shared
.mbulk3
); 
10701         addReply(c
,shared
.punsubscribebulk
); 
10702         addReplyBulk(c
,pattern
); 
10703         addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+ 
10704                        listLength(c
->pubsub_patterns
)); 
10706     decrRefCount(pattern
); 
10710 /* Unsubscribe from all the channels. Return the number of channels the 
10711  * client was subscribed from. */ 
10712 static int pubsubUnsubscribeAllChannels(redisClient 
*c
, int notify
) { 
10713     dictIterator 
*di 
= dictGetIterator(c
->pubsub_channels
); 
10717     while((de 
= dictNext(di
)) != NULL
) { 
10718         robj 
*channel 
= dictGetEntryKey(de
); 
10720         count 
+= pubsubUnsubscribeChannel(c
,channel
,notify
); 
10722     dictReleaseIterator(di
); 
10726 /* Unsubscribe from all the patterns. Return the number of patterns the 
10727  * client was subscribed from. */ 
10728 static int pubsubUnsubscribeAllPatterns(redisClient 
*c
, int notify
) { 
10733     listRewind(c
->pubsub_patterns
,&li
); 
10734     while ((ln 
= listNext(&li
)) != NULL
) { 
10735         robj 
*pattern 
= ln
->value
; 
10737         count 
+= pubsubUnsubscribePattern(c
,pattern
,notify
); 
10742 /* Publish a message */ 
10743 static int pubsubPublishMessage(robj 
*channel
, robj 
*message
) { 
10745     struct dictEntry 
*de
; 
10749     /* Send to clients listening for that channel */ 
10750     de 
= dictFind(server
.pubsub_channels
,channel
); 
10752         list 
*list 
= dictGetEntryVal(de
); 
10756         listRewind(list
,&li
); 
10757         while ((ln 
= listNext(&li
)) != NULL
) { 
10758             redisClient 
*c 
= ln
->value
; 
10760             addReply(c
,shared
.mbulk3
); 
10761             addReply(c
,shared
.messagebulk
); 
10762             addReplyBulk(c
,channel
); 
10763             addReplyBulk(c
,message
); 
10767     /* Send to clients listening to matching channels */ 
10768     if (listLength(server
.pubsub_patterns
)) { 
10769         listRewind(server
.pubsub_patterns
,&li
); 
10770         channel 
= getDecodedObject(channel
); 
10771         while ((ln 
= listNext(&li
)) != NULL
) { 
10772             pubsubPattern 
*pat 
= ln
->value
; 
10774             if (stringmatchlen((char*)pat
->pattern
->ptr
, 
10775                                 sdslen(pat
->pattern
->ptr
), 
10776                                 (char*)channel
->ptr
, 
10777                                 sdslen(channel
->ptr
),0)) { 
10778                 addReply(pat
->client
,shared
.mbulk4
); 
10779                 addReply(pat
->client
,shared
.pmessagebulk
); 
10780                 addReplyBulk(pat
->client
,pat
->pattern
); 
10781                 addReplyBulk(pat
->client
,channel
); 
10782                 addReplyBulk(pat
->client
,message
); 
10786         decrRefCount(channel
); 
10791 static void subscribeCommand(redisClient 
*c
) { 
10794     for (j 
= 1; j 
< c
->argc
; j
++) 
10795         pubsubSubscribeChannel(c
,c
->argv
[j
]); 
10798 static void unsubscribeCommand(redisClient 
*c
) { 
10799     if (c
->argc 
== 1) { 
10800         pubsubUnsubscribeAllChannels(c
,1); 
10805         for (j 
= 1; j 
< c
->argc
; j
++) 
10806             pubsubUnsubscribeChannel(c
,c
->argv
[j
],1); 
10810 static void psubscribeCommand(redisClient 
*c
) { 
10813     for (j 
= 1; j 
< c
->argc
; j
++) 
10814         pubsubSubscribePattern(c
,c
->argv
[j
]); 
10817 static void punsubscribeCommand(redisClient 
*c
) { 
10818     if (c
->argc 
== 1) { 
10819         pubsubUnsubscribeAllPatterns(c
,1); 
10824         for (j 
= 1; j 
< c
->argc
; j
++) 
10825             pubsubUnsubscribePattern(c
,c
->argv
[j
],1); 
10829 static void publishCommand(redisClient 
*c
) { 
10830     int receivers 
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]); 
10831     addReplyLongLong(c
,receivers
); 
10834 /* ===================== WATCH (CAS alike for MULTI/EXEC) =================== 
10836  * The implementation uses a per-DB hash table mapping keys to list of clients 
10837  * WATCHing those keys, so that given a key that is going to be modified 
10838  * we can mark all the associated clients as dirty. 
10840  * Also every client contains a list of WATCHed keys so that's possible to 
10841  * un-watch such keys when the client is freed or when UNWATCH is called. */ 
10843 /* In the client->watched_keys list we need to use watchedKey structures 
10844  * as in order to identify a key in Redis we need both the key name and the 
10846 typedef struct watchedKey 
{ 
10851 /* Watch for the specified key */ 
10852 static void watchForKey(redisClient 
*c
, robj 
*key
) { 
10853     list 
*clients 
= NULL
; 
10858     /* Check if we are already watching for this key */ 
10859     listRewind(c
->watched_keys
,&li
); 
10860     while((ln 
= listNext(&li
))) { 
10861         wk 
= listNodeValue(ln
); 
10862         if (wk
->db 
== c
->db 
&& equalStringObjects(key
,wk
->key
)) 
10863             return; /* Key already watched */ 
10865     /* This key is not already watched in this DB. Let's add it */ 
10866     clients 
= dictFetchValue(c
->db
->watched_keys
,key
); 
10868         clients 
= listCreate(); 
10869         dictAdd(c
->db
->watched_keys
,key
,clients
); 
10872     listAddNodeTail(clients
,c
); 
10873     /* Add the new key to the lits of keys watched by this client */ 
10874     wk 
= zmalloc(sizeof(*wk
)); 
10878     listAddNodeTail(c
->watched_keys
,wk
); 
10881 /* Unwatch all the keys watched by this client. To clean the EXEC dirty 
10882  * flag is up to the caller. */ 
10883 static void unwatchAllKeys(redisClient 
*c
) { 
10887     if (listLength(c
->watched_keys
) == 0) return; 
10888     listRewind(c
->watched_keys
,&li
); 
10889     while((ln 
= listNext(&li
))) { 
10893         /* Lookup the watched key -> clients list and remove the client 
10895         wk 
= listNodeValue(ln
); 
10896         clients 
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
); 
10897         assert(clients 
!= NULL
); 
10898         listDelNode(clients
,listSearchKey(clients
,c
)); 
10899         /* Kill the entry at all if this was the only client */ 
10900         if (listLength(clients
) == 0) 
10901             dictDelete(wk
->db
->watched_keys
, wk
->key
); 
10902         /* Remove this watched key from the client->watched list */ 
10903         listDelNode(c
->watched_keys
,ln
); 
10904         decrRefCount(wk
->key
); 
10909 /* "Touch" a key, so that if this key is being WATCHed by some client the 
10910  * next EXEC will fail. */ 
10911 static void touchWatchedKey(redisDb 
*db
, robj 
*key
) { 
10916     if (dictSize(db
->watched_keys
) == 0) return; 
10917     clients 
= dictFetchValue(db
->watched_keys
, key
); 
10918     if (!clients
) return; 
10920     /* Mark all the clients watching this key as REDIS_DIRTY_CAS */ 
10921     /* Check if we are already watching for this key */ 
10922     listRewind(clients
,&li
); 
10923     while((ln 
= listNext(&li
))) { 
10924         redisClient 
*c 
= listNodeValue(ln
); 
10926         c
->flags 
|= REDIS_DIRTY_CAS
; 
10930 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the 
10931  * flush but will be deleted as effect of the flushing operation should 
10932  * be touched. "dbid" is the DB that's getting the flush. -1 if it is 
10933  * a FLUSHALL operation (all the DBs flushed). */ 
10934 static void touchWatchedKeysOnFlush(int dbid
) { 
10938     /* For every client, check all the waited keys */ 
10939     listRewind(server
.clients
,&li1
); 
10940     while((ln 
= listNext(&li1
))) { 
10941         redisClient 
*c 
= listNodeValue(ln
); 
10942         listRewind(c
->watched_keys
,&li2
); 
10943         while((ln 
= listNext(&li2
))) { 
10944             watchedKey 
*wk 
= listNodeValue(ln
); 
10946             /* For every watched key matching the specified DB, if the 
10947              * key exists, mark the client as dirty, as the key will be 
10949             if (dbid 
== -1 || wk
->db
->id 
== dbid
) { 
10950                 if (dictFind(wk
->db
->dict
, wk
->key
->ptr
) != NULL
) 
10951                     c
->flags 
|= REDIS_DIRTY_CAS
; 
10957 static void watchCommand(redisClient 
*c
) { 
10960     if (c
->flags 
& REDIS_MULTI
) { 
10961         addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n")); 
10964     for (j 
= 1; j 
< c
->argc
; j
++) 
10965         watchForKey(c
,c
->argv
[j
]); 
10966     addReply(c
,shared
.ok
); 
10969 static void unwatchCommand(redisClient 
*c
) { 
10971     c
->flags 
&= (~REDIS_DIRTY_CAS
); 
10972     addReply(c
,shared
.ok
); 
10975 /* ================================= Debugging ============================== */ 
10977 /* Compute the sha1 of string at 's' with 'len' bytes long. 
10978  * The SHA1 is then xored againt the string pointed by digest. 
10979  * Since xor is commutative, this operation is used in order to 
10980  * "add" digests relative to unordered elements. 
10982  * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */ 
10983 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) { 
10985     unsigned char hash
[20], *s 
= ptr
; 
10989     SHA1Update(&ctx
,s
,len
); 
10990     SHA1Final(hash
,&ctx
); 
10992     for (j 
= 0; j 
< 20; j
++) 
10993         digest
[j
] ^= hash
[j
]; 
10996 static void xorObjectDigest(unsigned char *digest
, robj 
*o
) { 
10997     o 
= getDecodedObject(o
); 
10998     xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
)); 
11002 /* This function instead of just computing the SHA1 and xoring it 
11003  * against diget, also perform the digest of "digest" itself and 
11004  * replace the old value with the new one. 
11006  * So the final digest will be: 
11008  * digest = SHA1(digest xor SHA1(data)) 
11010  * This function is used every time we want to preserve the order so 
11011  * that digest(a,b,c,d) will be different than digest(b,c,d,a) 
11013  * Also note that mixdigest("foo") followed by mixdigest("bar") 
11014  * will lead to a different digest compared to "fo", "obar". 
11016 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) { 
11020     xorDigest(digest
,s
,len
); 
11022     SHA1Update(&ctx
,digest
,20); 
11023     SHA1Final(digest
,&ctx
); 
11026 static void mixObjectDigest(unsigned char *digest
, robj 
*o
) { 
11027     o 
= getDecodedObject(o
); 
11028     mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
)); 
11032 /* Compute the dataset digest. Since keys, sets elements, hashes elements 
11033  * are not ordered, we use a trick: every aggregate digest is the xor 
11034  * of the digests of their elements. This way the order will not change 
11035  * the result. For list instead we use a feedback entering the output digest 
11036  * as input in order to ensure that a different ordered list will result in 
11037  * a different digest. */ 
11038 static void computeDatasetDigest(unsigned char *final
) { 
11039     unsigned char digest
[20]; 
11041     dictIterator 
*di 
= NULL
; 
11046     memset(final
,0,20); /* Start with a clean result */ 
11048     for (j 
= 0; j 
< server
.dbnum
; j
++) { 
11049         redisDb 
*db 
= server
.db
+j
; 
11051         if (dictSize(db
->dict
) == 0) continue; 
11052         di 
= dictGetIterator(db
->dict
); 
11054         /* hash the DB id, so the same dataset moved in a different 
11055          * DB will lead to a different digest */ 
11057         mixDigest(final
,&aux
,sizeof(aux
)); 
11059         /* Iterate this DB writing every entry */ 
11060         while((de 
= dictNext(di
)) != NULL
) { 
11065             memset(digest
,0,20); /* This key-val digest */ 
11066             key 
= dictGetEntryKey(de
); 
11067             keyobj 
= createStringObject(key
,sdslen(key
)); 
11069             mixDigest(digest
,key
,sdslen(key
)); 
11071             /* Make sure the key is loaded if VM is active */ 
11072             o 
= lookupKeyRead(db
,keyobj
); 
11074             aux 
= htonl(o
->type
); 
11075             mixDigest(digest
,&aux
,sizeof(aux
)); 
11076             expiretime 
= getExpire(db
,keyobj
); 
11078             /* Save the key and associated value */ 
11079             if (o
->type 
== REDIS_STRING
) { 
11080                 mixObjectDigest(digest
,o
); 
11081             } else if (o
->type 
== REDIS_LIST
) { 
11082                 listTypeIterator 
*li 
= listTypeInitIterator(o
,0,REDIS_TAIL
); 
11083                 listTypeEntry entry
; 
11084                 while(listTypeNext(li
,&entry
)) { 
11085                     robj 
*eleobj 
= listTypeGet(&entry
); 
11086                     mixObjectDigest(digest
,eleobj
); 
11087                     decrRefCount(eleobj
); 
11089                 listTypeReleaseIterator(li
); 
11090             } else if (o
->type 
== REDIS_SET
) { 
11091                 dict 
*set 
= o
->ptr
; 
11092                 dictIterator 
*di 
= dictGetIterator(set
); 
11095                 while((de 
= dictNext(di
)) != NULL
) { 
11096                     robj 
*eleobj 
= dictGetEntryKey(de
); 
11098                     xorObjectDigest(digest
,eleobj
); 
11100                 dictReleaseIterator(di
); 
11101             } else if (o
->type 
== REDIS_ZSET
) { 
11103                 dictIterator 
*di 
= dictGetIterator(zs
->dict
); 
11106                 while((de 
= dictNext(di
)) != NULL
) { 
11107                     robj 
*eleobj 
= dictGetEntryKey(de
); 
11108                     double *score 
= dictGetEntryVal(de
); 
11109                     unsigned char eledigest
[20]; 
11111                     snprintf(buf
,sizeof(buf
),"%.17g",*score
); 
11112                     memset(eledigest
,0,20); 
11113                     mixObjectDigest(eledigest
,eleobj
); 
11114                     mixDigest(eledigest
,buf
,strlen(buf
)); 
11115                     xorDigest(digest
,eledigest
,20); 
11117                 dictReleaseIterator(di
); 
11118             } else if (o
->type 
== REDIS_HASH
) { 
11119                 hashTypeIterator 
*hi
; 
11122                 hi 
= hashTypeInitIterator(o
); 
11123                 while (hashTypeNext(hi
) != REDIS_ERR
) { 
11124                     unsigned char eledigest
[20]; 
11126                     memset(eledigest
,0,20); 
11127                     obj 
= hashTypeCurrent(hi
,REDIS_HASH_KEY
); 
11128                     mixObjectDigest(eledigest
,obj
); 
11130                     obj 
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
); 
11131                     mixObjectDigest(eledigest
,obj
); 
11133                     xorDigest(digest
,eledigest
,20); 
11135                 hashTypeReleaseIterator(hi
); 
11137                 redisPanic("Unknown object type"); 
11139             /* If the key has an expire, add it to the mix */ 
11140             if (expiretime 
!= -1) xorDigest(digest
,"!!expire!!",10); 
11141             /* We can finally xor the key-val digest to the final digest */ 
11142             xorDigest(final
,digest
,20); 
11143             decrRefCount(keyobj
); 
11145         dictReleaseIterator(di
); 
11149 static void debugCommand(redisClient 
*c
) { 
11150     if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) { 
11151         *((char*)-1) = 'x'; 
11152     } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) { 
11153         if (rdbSave(server
.dbfilename
) != REDIS_OK
) { 
11154             addReply(c
,shared
.err
); 
11158         if (rdbLoad(server
.dbfilename
) != REDIS_OK
) { 
11159             addReply(c
,shared
.err
); 
11162         redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD"); 
11163         addReply(c
,shared
.ok
); 
11164     } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) { 
11166         if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) { 
11167             addReply(c
,shared
.err
); 
11170         redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF"); 
11171         addReply(c
,shared
.ok
); 
11172     } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc 
== 3) { 
11173         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
); 
11177             addReply(c
,shared
.nokeyerr
); 
11180         val 
= dictGetEntryVal(de
); 
11181         if (!server
.vm_enabled 
|| (val
->storage 
== REDIS_VM_MEMORY 
|| 
11182                                    val
->storage 
== REDIS_VM_SWAPPING
)) { 
11186             if (val
->encoding 
< (sizeof(strencoding
)/sizeof(char*))) { 
11187                 strenc 
= strencoding
[val
->encoding
]; 
11189                 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
); 
11192             addReplySds(c
,sdscatprintf(sdsempty(), 
11193                 "+Value at:%p refcount:%d " 
11194                 "encoding:%s serializedlength:%lld\r\n", 
11195                 (void*)val
, val
->refcount
, 
11196                 strenc
, (long long) rdbSavedObjectLen(val
,NULL
))); 
11198             vmpointer 
*vp 
= (vmpointer
*) val
; 
11199             addReplySds(c
,sdscatprintf(sdsempty(), 
11200                 "+Value swapped at: page %llu " 
11201                 "using %llu pages\r\n", 
11202                 (unsigned long long) vp
->page
, 
11203                 (unsigned long long) vp
->usedpages
)); 
11205     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc 
== 3) { 
11206         lookupKeyRead(c
->db
,c
->argv
[2]); 
11207         addReply(c
,shared
.ok
); 
11208     } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc 
== 3) { 
11209         dictEntry 
*de 
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
); 
11213         if (!server
.vm_enabled
) { 
11214             addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n")); 
11218             addReply(c
,shared
.nokeyerr
); 
11221         val 
= dictGetEntryVal(de
); 
11223         if (val
->storage 
!= REDIS_VM_MEMORY
) { 
11224             addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n")); 
11225         } else if (val
->refcount 
!= 1) { 
11226             addReplySds(c
,sdsnew("-ERR Object is shared\r\n")); 
11227         } else if ((vp 
= vmSwapObjectBlocking(val
)) != NULL
) { 
11228             dictGetEntryVal(de
) = vp
; 
11229             addReply(c
,shared
.ok
); 
11231             addReply(c
,shared
.err
); 
11233     } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc 
== 3) { 
11238         if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
) 
11240         for (j 
= 0; j 
< keys
; j
++) { 
11241             snprintf(buf
,sizeof(buf
),"key:%lu",j
); 
11242             key 
= createStringObject(buf
,strlen(buf
)); 
11243             if (lookupKeyRead(c
->db
,key
) != NULL
) { 
11247             snprintf(buf
,sizeof(buf
),"value:%lu",j
); 
11248             val 
= createStringObject(buf
,strlen(buf
)); 
11249             dbAdd(c
->db
,key
,val
); 
11252         addReply(c
,shared
.ok
); 
11253     } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc 
== 2) { 
11254         unsigned char digest
[20]; 
11255         sds d 
= sdsnew("+"); 
11258         computeDatasetDigest(digest
); 
11259         for (j 
= 0; j 
< 20; j
++) 
11260             d 
= sdscatprintf(d
, "%02x",digest
[j
]); 
11262         d 
= sdscatlen(d
,"\r\n",2); 
11265         addReplySds(c
,sdsnew( 
11266             "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n")); 
11270 static void _redisAssert(char *estr
, char *file
, int line
) { 
11271     redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ==="); 
11272     redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
); 
11273 #ifdef HAVE_BACKTRACE 
11274     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
11275     *((char*)-1) = 'x'; 
11279 static void _redisPanic(char *msg
, char *file
, int line
) { 
11280     redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue"); 
11281     redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
); 
11282 #ifdef HAVE_BACKTRACE 
11283     redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)"); 
11284     *((char*)-1) = 'x'; 
11288 /* =================================== Main! ================================ */ 
11291 int linuxOvercommitMemoryValue(void) { 
11292     FILE *fp 
= fopen("/proc/sys/vm/overcommit_memory","r"); 
11295     if (!fp
) return -1; 
11296     if (fgets(buf
,64,fp
) == NULL
) { 
11305 void linuxOvercommitMemoryWarning(void) { 
11306     if (linuxOvercommitMemoryValue() == 0) { 
11307         redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); 
11310 #endif /* __linux__ */ 
11312 static void daemonize(void) { 
11316     if (fork() != 0) exit(0); /* parent exits */ 
11317     setsid(); /* create a new session */ 
11319     /* Every output goes to /dev/null. If Redis is daemonized but 
11320      * the 'logfile' is set to 'stdout' in the configuration file 
11321      * it will not log at all. */ 
11322     if ((fd 
= open("/dev/null", O_RDWR
, 0)) != -1) { 
11323         dup2(fd
, STDIN_FILENO
); 
11324         dup2(fd
, STDOUT_FILENO
); 
11325         dup2(fd
, STDERR_FILENO
); 
11326         if (fd 
> STDERR_FILENO
) close(fd
); 
11328     /* Try to write the pid file */ 
11329     fp 
= fopen(server
.pidfile
,"w"); 
11331         fprintf(fp
,"%d\n",getpid()); 
11336 static void version() { 
11337     printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
, 
11338         REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0); 
11342 static void usage() { 
11343     fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n"); 
11344     fprintf(stderr
,"       ./redis-server - (read config from stdin)\n"); 
11348 int main(int argc
, char **argv
) { 
11351     initServerConfig(); 
11352     sortCommandTable(); 
11354         if (strcmp(argv
[1], "-v") == 0 || 
11355             strcmp(argv
[1], "--version") == 0) version(); 
11356         if (strcmp(argv
[1], "--help") == 0) usage(); 
11357         resetServerSaveParams(); 
11358         loadServerConfig(argv
[1]); 
11359     } else if ((argc 
> 2)) { 
11362         redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); 
11364     if (server
.daemonize
) daemonize(); 
11366     redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
); 
11368     linuxOvercommitMemoryWarning(); 
11370     start 
= time(NULL
); 
11371     if (server
.appendonly
) { 
11372         if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
) 
11373             redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
); 
11375         if (rdbLoad(server
.dbfilename
) == REDIS_OK
) 
11376             redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
); 
11378     redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
); 
11379     aeSetBeforeSleepProc(server
.el
,beforeSleep
); 
11381     aeDeleteEventLoop(server
.el
); 
11385 /* ============================= Backtrace support ========================= */ 
11387 #ifdef HAVE_BACKTRACE 
11388 static char *findFuncName(void *pointer
, unsigned long *offset
); 
11390 static void *getMcontextEip(ucontext_t 
*uc
) { 
11391 #if defined(__FreeBSD__) 
11392     return (void*) uc
->uc_mcontext
.mc_eip
; 
11393 #elif defined(__dietlibc__) 
11394     return (void*) uc
->uc_mcontext
.eip
; 
11395 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6) 
11397     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
11399     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
11401 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6) 
11402   #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__) 
11403     return (void*) uc
->uc_mcontext
->__ss
.__rip
; 
11405     return (void*) uc
->uc_mcontext
->__ss
.__eip
; 
11407 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__) 
11408     return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */ 
11409 #elif defined(__ia64__) /* Linux IA64 */ 
11410     return (void*) uc
->uc_mcontext
.sc_ip
; 
11416 static void segvHandler(int sig
, siginfo_t 
*info
, void *secret
) { 
11418     char **messages 
= NULL
; 
11419     int i
, trace_size 
= 0; 
11420     unsigned long offset
=0; 
11421     ucontext_t 
*uc 
= (ucontext_t
*) secret
; 
11423     REDIS_NOTUSED(info
); 
11425     redisLog(REDIS_WARNING
, 
11426         "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
); 
11427     infostring 
= genRedisInfoString(); 
11428     redisLog(REDIS_WARNING
, "%s",infostring
); 
11429     /* It's not safe to sdsfree() the returned string under memory 
11430      * corruption conditions. Let it leak as we are going to abort */ 
11432     trace_size 
= backtrace(trace
, 100); 
11433     /* overwrite sigaction with caller's address */ 
11434     if (getMcontextEip(uc
) != NULL
) { 
11435         trace
[1] = getMcontextEip(uc
); 
11437     messages 
= backtrace_symbols(trace
, trace_size
); 
11439     for (i
=1; i
<trace_size
; ++i
) { 
11440         char *fn 
= findFuncName(trace
[i
], &offset
), *p
; 
11442         p 
= strchr(messages
[i
],'+'); 
11443         if (!fn 
|| (p 
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) { 
11444             redisLog(REDIS_WARNING
,"%s", messages
[i
]); 
11446             redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
); 
11449     /* free(messages); Don't call free() with possibly corrupted memory. */ 
11453 static void sigtermHandler(int sig
) { 
11454     REDIS_NOTUSED(sig
); 
11456     redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down..."); 
11457     server
.shutdown_asap 
= 1; 
11460 static void setupSigSegvAction(void) { 
11461     struct sigaction act
; 
11463     sigemptyset (&act
.sa_mask
); 
11464     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction 
11465      * is used. Otherwise, sa_handler is used */ 
11466     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND 
| SA_SIGINFO
; 
11467     act
.sa_sigaction 
= segvHandler
; 
11468     sigaction (SIGSEGV
, &act
, NULL
); 
11469     sigaction (SIGBUS
, &act
, NULL
); 
11470     sigaction (SIGFPE
, &act
, NULL
); 
11471     sigaction (SIGILL
, &act
, NULL
); 
11472     sigaction (SIGBUS
, &act
, NULL
); 
11474     act
.sa_flags 
= SA_NODEFER 
| SA_ONSTACK 
| SA_RESETHAND
; 
11475     act
.sa_handler 
= sigtermHandler
; 
11476     sigaction (SIGTERM
, &act
, NULL
); 
11480 #include "staticsymbols.h" 
11481 /* This function try to convert a pointer into a function name. It's used in 
11482  * oreder to provide a backtrace under segmentation fault that's able to 
11483  * display functions declared as static (otherwise the backtrace is useless). */ 
11484 static char *findFuncName(void *pointer
, unsigned long *offset
){ 
11486     unsigned long off
, minoff 
= 0; 
11488     /* Try to match against the Symbol with the smallest offset */ 
11489     for (i
=0; symsTable
[i
].pointer
; i
++) { 
11490         unsigned long lp 
= (unsigned long) pointer
; 
11492         if (lp 
!= (unsigned long)-1 && lp 
>= symsTable
[i
].pointer
) { 
11493             off
=lp
-symsTable
[i
].pointer
; 
11494             if (ret 
< 0 || off 
< minoff
) { 
11500     if (ret 
== -1) return NULL
; 
11502     return symsTable
[ret
].name
; 
11504 #else /* HAVE_BACKTRACE */ 
11505 static void setupSigSegvAction(void) { 
11507 #endif /* HAVE_BACKTRACE */